diff options
author | Araq <rumpf_a@web.de> | 2011-04-12 01:13:42 +0200 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2011-04-12 01:13:42 +0200 |
commit | cd292568d775d55d9abb51e962882ecda12c03a9 (patch) | |
tree | 85451f0e1f17dc0463350915f12bdd0a82a73455 /nim/paslex.pas | |
parent | 46c41e43690cba9bc1caff6a994bb6915df8a1b7 (diff) | |
download | Nim-cd292568d775d55d9abb51e962882ecda12c03a9.tar.gz |
big repo cleanup
Diffstat (limited to 'nim/paslex.pas')
-rwxr-xr-x | nim/paslex.pas | 738 |
1 files changed, 0 insertions, 738 deletions
diff --git a/nim/paslex.pas b/nim/paslex.pas deleted file mode 100755 index f3d8daaeb..000000000 --- a/nim/paslex.pas +++ /dev/null @@ -1,738 +0,0 @@ -// -// -// The Nimrod Compiler -// (c) Copyright 2009 Andreas Rumpf -// -// See the file "copying.txt", included in this -// distribution, for details about the copyright. -// -unit paslex; - -// This module implements a FreePascal scanner. This is a adaption from -// the scanner module. - -interface - -{$include 'config.inc'} - -uses - charsets, nsystem, sysutils, - nhashes, options, msgs, strutils, platform, idents, - lexbase, wordrecg, scanner; - -const - MaxLineLength = 80; // lines longer than this lead to a warning - - numChars: TCharSet = ['0'..'9','a'..'z','A'..'Z']; // we support up to base 36 - SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', #128..#255]; - SymStartChars: TCharSet = ['a'..'z', 'A'..'Z', #128..#255]; - OpChars: TCharSet = ['+', '-', '*', '/', '<', '>', '!', '?', '^', '.', - '|', '=', ':', '%', '&', '$', '@', '~', #128..#255]; - -type - // order is important for TPasTokKind - TPasTokKind = (pxInvalid, pxEof, - // keywords: - //[[[cog - //from string import capitalize - //keywords = eval(open("data/pas_keyw.yml").read()) - //idents = "" - //strings = "" - //i = 1 - //for k in keywords: - // idents = idents + "px" + capitalize(k) + ", " - // strings = strings + "'" + k + "', " - // if i % 4 == 0: - // idents = idents + "\n" - // strings = strings + "\n" - // i = i + 1 - //cog.out(idents) - //]]] - pxAnd, pxArray, pxAs, pxAsm, - pxBegin, pxCase, pxClass, pxConst, - pxConstructor, pxDestructor, pxDiv, pxDo, - pxDownto, pxElse, pxEnd, pxExcept, - pxExports, pxFinalization, pxFinally, pxFor, - pxFunction, pxGoto, pxIf, pxImplementation, - pxIn, pxInherited, pxInitialization, pxInline, - pxInterface, pxIs, pxLabel, pxLibrary, - pxMod, pxNil, pxNot, pxObject, - pxOf, pxOr, pxOut, pxPacked, - pxProcedure, pxProgram, pxProperty, pxRaise, - pxRecord, pxRepeat, pxResourcestring, pxSet, - pxShl, pxShr, pxThen, pxThreadvar, - pxTo, pxTry, pxType, pxUnit, - pxUntil, pxUses, pxVar, pxWhile, - pxWith, pxXor, - //[[[end]]] - pxComment, // ordinary comment - pxCommand, // {@} - pxAmp, // {&} - pxPer, // {%} - pxStrLit, - pxSymbol, // a symbol - - pxIntLit, - pxInt64Lit, // long constant like 0x00000070fffffff or out of int range - pxFloatLit, - - pxParLe, pxParRi, pxBracketLe, pxBracketRi, - pxComma, pxSemiColon, pxColon, - - // operators - pxAsgn, - pxEquals, pxDot, pxDotDot, pxHat, pxPlus, pxMinus, pxStar, pxSlash, - pxLe, pxLt, pxGe, pxGt, pxNeq, pxAt, - - pxStarDirLe, - pxStarDirRi, - pxCurlyDirLe, - pxCurlyDirRi - ); - TPasTokKinds = set of TPasTokKind; -const - PasTokKindToStr: array [TPasTokKind] of string = ( - 'pxInvalid', '[EOF]', - //[[[cog - //cog.out(strings) - //]]] - 'and', 'array', 'as', 'asm', - 'begin', 'case', 'class', 'const', - 'constructor', 'destructor', 'div', 'do', - 'downto', 'else', 'end', 'except', - 'exports', 'finalization', 'finally', 'for', - 'function', 'goto', 'if', 'implementation', - 'in', 'inherited', 'initialization', 'inline', - 'interface', 'is', 'label', 'library', - 'mod', 'nil', 'not', 'object', - 'of', 'or', 'out', 'packed', - 'procedure', 'program', 'property', 'raise', - 'record', 'repeat', 'resourcestring', 'set', - 'shl', 'shr', 'then', 'threadvar', - 'to', 'try', 'type', 'unit', - 'until', 'uses', 'var', 'while', - 'with', 'xor', - //[[[end]]] - 'pxComment', 'pxCommand', - '{&}', '{%}', 'pxStrLit', '[IDENTIFIER]', 'pxIntLit', 'pxInt64Lit', - 'pxFloatLit', - '('+'', ')'+'', '['+'', ']'+'', - ','+'', ';'+'', ':'+'', - ':=', '='+'', '.'+'', '..', '^'+'', '+'+'', '-'+'', '*'+'', '/'+'', - '<=', '<'+'', '>=', '>'+'', '<>', '@'+'', '(*$', '*)', '{$', '}'+'' - ); - -type - TPasTok = object(TToken) // a Pascal token - xkind: TPasTokKind; // the type of the token - end; - - TPasLex = object(TLexer) - end; - -procedure getPasTok(var L: TPasLex; out tok: TPasTok); - -procedure PrintPasTok(const tok: TPasTok); -function pasTokToStr(const tok: TPasTok): string; - -implementation - -function pastokToStr(const tok: TPasTok): string; -begin - case tok.xkind of - pxIntLit, pxInt64Lit: - result := toString(tok.iNumber); - pxFloatLit: - result := toStringF(tok.fNumber); - pxInvalid, pxComment..pxStrLit: - result := tok.literal; - else if (tok.ident.s <> '') then - result := tok.ident.s - else - result := pasTokKindToStr[tok.xkind]; - end -end; - -procedure PrintPasTok(const tok: TPasTok); -begin - write(output, pasTokKindToStr[tok.xkind]); - write(output, ' '); - writeln(output, pastokToStr(tok)) -end; - -// ---------------------------------------------------------------------------- - -procedure setKeyword(var L: TPasLex; var tok: TPasTok); -begin - case tok.ident.id of - //[[[cog - //for k in keywords: - // m = capitalize(k) - // cog.outl("ord(w%s):%s tok.xkind := px%s;" % (m, ' '*(18-len(m)), m)) - //]]] - ord(wAnd): tok.xkind := pxAnd; - ord(wArray): tok.xkind := pxArray; - ord(wAs): tok.xkind := pxAs; - ord(wAsm): tok.xkind := pxAsm; - ord(wBegin): tok.xkind := pxBegin; - ord(wCase): tok.xkind := pxCase; - ord(wClass): tok.xkind := pxClass; - ord(wConst): tok.xkind := pxConst; - ord(wConstructor): tok.xkind := pxConstructor; - ord(wDestructor): tok.xkind := pxDestructor; - ord(wDiv): tok.xkind := pxDiv; - ord(wDo): tok.xkind := pxDo; - ord(wDownto): tok.xkind := pxDownto; - ord(wElse): tok.xkind := pxElse; - ord(wEnd): tok.xkind := pxEnd; - ord(wExcept): tok.xkind := pxExcept; - ord(wExports): tok.xkind := pxExports; - ord(wFinalization): tok.xkind := pxFinalization; - ord(wFinally): tok.xkind := pxFinally; - ord(wFor): tok.xkind := pxFor; - ord(wFunction): tok.xkind := pxFunction; - ord(wGoto): tok.xkind := pxGoto; - ord(wIf): tok.xkind := pxIf; - ord(wImplementation): tok.xkind := pxImplementation; - ord(wIn): tok.xkind := pxIn; - ord(wInherited): tok.xkind := pxInherited; - ord(wInitialization): tok.xkind := pxInitialization; - ord(wInline): tok.xkind := pxInline; - ord(wInterface): tok.xkind := pxInterface; - ord(wIs): tok.xkind := pxIs; - ord(wLabel): tok.xkind := pxLabel; - ord(wLibrary): tok.xkind := pxLibrary; - ord(wMod): tok.xkind := pxMod; - ord(wNil): tok.xkind := pxNil; - ord(wNot): tok.xkind := pxNot; - ord(wObject): tok.xkind := pxObject; - ord(wOf): tok.xkind := pxOf; - ord(wOr): tok.xkind := pxOr; - ord(wOut): tok.xkind := pxOut; - ord(wPacked): tok.xkind := pxPacked; - ord(wProcedure): tok.xkind := pxProcedure; - ord(wProgram): tok.xkind := pxProgram; - ord(wProperty): tok.xkind := pxProperty; - ord(wRaise): tok.xkind := pxRaise; - ord(wRecord): tok.xkind := pxRecord; - ord(wRepeat): tok.xkind := pxRepeat; - ord(wResourcestring): tok.xkind := pxResourcestring; - ord(wSet): tok.xkind := pxSet; - ord(wShl): tok.xkind := pxShl; - ord(wShr): tok.xkind := pxShr; - ord(wThen): tok.xkind := pxThen; - ord(wThreadvar): tok.xkind := pxThreadvar; - ord(wTo): tok.xkind := pxTo; - ord(wTry): tok.xkind := pxTry; - ord(wType): tok.xkind := pxType; - ord(wUnit): tok.xkind := pxUnit; - ord(wUntil): tok.xkind := pxUntil; - ord(wUses): tok.xkind := pxUses; - ord(wVar): tok.xkind := pxVar; - ord(wWhile): tok.xkind := pxWhile; - ord(wWith): tok.xkind := pxWith; - ord(wXor): tok.xkind := pxXor; - //[[[end]]] - else tok.xkind := pxSymbol - end -end; - - -// ---------------------------------------------------------------------------- - -procedure matchUnderscoreChars(var L: TPasLex; var tok: TPasTok; - const chars: TCharSet); -// matches ([chars]_)* -var - pos: int; - buf: PChar; -begin - pos := L.bufpos; // use registers for pos, buf - buf := L.buf; - repeat - if buf[pos] in chars then begin - addChar(tok.literal, buf[pos]); - Inc(pos) - end - else break; - if buf[pos] = '_' then begin - addChar(tok.literal, '_'); - Inc(pos); - end; - until false; - L.bufPos := pos; -end; - -function isFloatLiteral(const s: string): boolean; -var - i: int; -begin - for i := strStart to length(s)+strStart-1 do - if s[i] in ['.','e','E'] then begin - result := true; exit - end; - result := false -end; - -procedure getNumber2(var L: TPasLex; var tok: TPasTok); -var - pos, bits: int; - xi: biggestInt; -begin - pos := L.bufpos+1; // skip % - if not (L.buf[pos] in ['0'..'1']) then begin // BUGFIX for %date% - tok.xkind := pxInvalid; - addChar(tok.literal, '%'); - inc(L.bufpos); - exit; - end; - - tok.base := base2; - xi := 0; - bits := 0; - while true do begin - case L.buf[pos] of - 'A'..'Z', 'a'..'z', '2'..'9', '.': begin - lexMessage(L, errInvalidNumber); - inc(pos) - end; - '_': inc(pos); - '0', '1': begin - xi := shlu(xi, 1) or (ord(L.buf[pos]) - ord('0')); - inc(pos); - inc(bits); - end; - else break; - end - end; - tok.iNumber := xi; - if (bits > 32) then //or (xi < low(int32)) or (xi > high(int32)) then - tok.xkind := pxInt64Lit - else - tok.xkind := pxIntLit; - L.bufpos := pos; -end; - -procedure getNumber16(var L: TPasLex; var tok: TPasTok); -var - pos, bits: int; - xi: biggestInt; -begin - pos := L.bufpos+1; // skip $ - tok.base := base16; - xi := 0; - bits := 0; - while true do begin - case L.buf[pos] of - 'G'..'Z', 'g'..'z', '.': begin - lexMessage(L, errInvalidNumber); - inc(pos); - end; - '_': inc(pos); - '0'..'9': begin - xi := shlu(xi, 4) or (ord(L.buf[pos]) - ord('0')); - inc(pos); - inc(bits, 4); - end; - 'a'..'f': begin - xi := shlu(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10); - inc(pos); - inc(bits, 4); - end; - 'A'..'F': begin - xi := shlu(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10); - inc(pos); - inc(bits, 4); - end; - else break; - end - end; - tok.iNumber := xi; - if (bits > 32) then // (xi < low(int32)) or (xi > high(int32)) then - tok.xkind := pxInt64Lit - else - tok.xkind := pxIntLit; - L.bufpos := pos; -end; - -procedure getNumber10(var L: TPasLex; var tok: TPasTok); -begin - tok.base := base10; - matchUnderscoreChars(L, tok, ['0'..'9']); - if (L.buf[L.bufpos] = '.') and (L.buf[L.bufpos+1] in ['0'..'9']) then begin - addChar(tok.literal, '.'); - inc(L.bufpos); - matchUnderscoreChars(L, tok, ['e', 'E', '+', '-', '0'..'9']) - end; - try - if isFloatLiteral(tok.literal) then begin - tok.fnumber := parseFloat(tok.literal); - tok.xkind := pxFloatLit; - end - else begin - tok.iNumber := ParseInt(tok.literal); - if (tok.iNumber < low(int32)) or (tok.iNumber > high(int32)) then - tok.xkind := pxInt64Lit - else - tok.xkind := pxIntLit; - end; - except - on EInvalidValue do - lexMessage(L, errInvalidNumber, tok.literal); - on EOverflow do - lexMessage(L, errNumberOutOfRange, tok.literal); - {@ignore} - on sysutils.EIntOverflow do - lexMessage(L, errNumberOutOfRange, tok.literal); - {@emit} - end; -end; - -function HandleCRLF(var L: TLexer; pos: int): int; -begin - case L.buf[pos] of - CR: result := lexbase.HandleCR(L, pos); - LF: result := lexbase.HandleLF(L, pos); - else result := pos - end -end; - -procedure getString(var L: TPasLex; var tok: TPasTok); -var - pos, xi: int; - buf: PChar; -begin - pos := L.bufPos; - buf := L.buf; - while true do begin - if buf[pos] = '''' then begin - inc(pos); - while true do begin - case buf[pos] of - CR, LF, lexbase.EndOfFile: begin - lexMessage(L, errClosingQuoteExpected); - break - end; - '''': begin - inc(pos); - if buf[pos] = '''' then begin - inc(pos); - addChar(tok.literal, ''''); - end - else break; - end; - else begin - addChar(tok.literal, buf[pos]); - inc(pos); - end - end - end - end - else if buf[pos] = '#' then begin - inc(pos); - xi := 0; - case buf[pos] of - '$': begin - inc(pos); - xi := 0; - while true do begin - case buf[pos] of - '0'..'9': xi := (xi shl 4) or (ord(buf[pos]) - ord('0')); - 'a'..'f': xi := (xi shl 4) or (ord(buf[pos]) - ord('a') + 10); - 'A'..'F': xi := (xi shl 4) or (ord(buf[pos]) - ord('A') + 10); - else break; - end; - inc(pos) - end - end; - '0'..'9': begin - xi := 0; - while buf[pos] in ['0'..'9'] do begin - xi := (xi * 10) + (ord(buf[pos]) - ord('0')); - inc(pos); - end; - end - else lexMessage(L, errInvalidCharacterConstant) - end; - if (xi <= 255) then - addChar(tok.literal, Chr(xi)) - else - lexMessage(L, errInvalidCharacterConstant) - end - else break - end; - tok.xkind := pxStrLit; - L.bufpos := pos; -end; - -{@ignore} -{$ifopt Q+} {$define Q_on} {$Q-} {$endif} -{$ifopt R+} {$define R_on} {$R-} {$endif} -{@emit} -procedure getSymbol(var L: TPasLex; var tok: TPasTok); -var - pos: int; - c: Char; - buf: pchar; - h: THash; // hashing algorithm inlined -begin - h := 0; - pos := L.bufpos; - buf := L.buf; - while true do begin - c := buf[pos]; - case c of - 'a'..'z', '0'..'9', #128..#255: begin - h := h +{%} Ord(c); - h := h +{%} h shl 10; - h := h xor (h shr 6) - end; - 'A'..'Z': begin - c := chr(ord(c) + (ord('a')-ord('A'))); // toLower() - h := h +{%} Ord(c); - h := h +{%} h shl 10; - h := h xor (h shr 6) - end; - '_': begin end; - else break - end; - Inc(pos) - end; - h := h +{%} h shl 3; - h := h xor (h shr 11); - h := h +{%} h shl 15; - tok.ident := getIdent(addr(L.buf[L.bufpos]), pos-L.bufpos, h); - L.bufpos := pos; - setKeyword(L, tok); -end; -{@ignore} -{$ifdef Q_on} {$undef Q_on} {$Q+} {$endif} -{$ifdef R_on} {$undef R_on} {$R+} {$endif} -{@emit} - -procedure scanLineComment(var L: TPasLex; var tok: TPasTok); -var - buf: PChar; - pos, col: int; - indent: int; -begin - pos := L.bufpos; - buf := L.buf; - // a comment ends if the next line does not start with the // on the same - // column after only whitespace - tok.xkind := pxComment; - col := getColNumber(L, pos); - while true do begin - inc(pos, 2); // skip // - addChar(tok.literal, '#'); - while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do begin - addChar(tok.literal, buf[pos]); inc(pos); - end; - pos := handleCRLF(L, pos); - buf := L.buf; - indent := 0; - while buf[pos] = ' ' do begin inc(pos); inc(indent) end; - if (col = indent) and (buf[pos] = '/') and (buf[pos+1] = '/') then - tok.literal := tok.literal +{&} nl - else - break - end; - L.bufpos := pos; -end; - -procedure scanCurlyComment(var L: TPasLex; var tok: TPasTok); -var - buf: PChar; - pos: int; -begin - pos := L.bufpos; - buf := L.buf; - tok.literal := '#'+''; - tok.xkind := pxComment; - repeat - case buf[pos] of - CR, LF: begin - pos := HandleCRLF(L, pos); - buf := L.buf; - tok.literal := tok.literal +{&} nl + '#'; - end; - '}': begin inc(pos); break end; - lexbase.EndOfFile: lexMessage(L, errTokenExpected, '}'+''); - else begin - addChar(tok.literal, buf[pos]); - inc(pos) - end - end - until false; - L.bufpos := pos; -end; - -procedure scanStarComment(var L: TPasLex; var tok: TPasTok); -var - buf: PChar; - pos: int; -begin - pos := L.bufpos; - buf := L.buf; - tok.literal := '#'+''; - tok.xkind := pxComment; - repeat - case buf[pos] of - CR, LF: begin - pos := HandleCRLF(L, pos); - buf := L.buf; - tok.literal := tok.literal +{&} nl + '#'; - end; - '*': begin - inc(pos); - if buf[pos] = ')' then begin inc(pos); break end - else addChar(tok.literal, '*') - end; - lexbase.EndOfFile: lexMessage(L, errTokenExpected, '*)'); - else begin - addChar(tok.literal, buf[pos]); - inc(pos) - end - end - until false; - L.bufpos := pos; -end; - -procedure skip(var L: TPasLex; var tok: TPasTok); -var - buf: PChar; - pos: int; -begin - pos := L.bufpos; - buf := L.buf; - repeat - case buf[pos] of - ' ', Tabulator: Inc(pos); - // newline is special: - CR, LF: begin - pos := HandleCRLF(L, pos); - buf := L.buf; - end - else break // EndOfFile also leaves the loop - end - until false; - L.bufpos := pos; -end; - -procedure getPasTok(var L: TPasLex; out tok: TPasTok); -var - c: Char; -begin - tok.xkind := pxInvalid; - fillToken(tok); - skip(L, tok); - c := L.buf[L.bufpos]; - if c in SymStartChars then // common case first - getSymbol(L, tok) - else if c in ['0'..'9'] then - getNumber10(L, tok) - else begin - case c of - ';': begin tok.xkind := pxSemicolon; Inc(L.bufpos) end; - '/': begin - if L.buf[L.bufpos+1] = '/' then scanLineComment(L, tok) - else begin tok.xkind := pxSlash; inc(L.bufpos) end; - end; - ',': begin tok.xkind := pxComma; Inc(L.bufpos) end; - '(': begin - Inc(L.bufpos); - if (L.buf[L.bufPos] = '*') then begin - if (L.buf[L.bufPos+1] = '$') then begin - Inc(L.bufpos, 2); - skip(L, tok); - getSymbol(L, tok); - tok.xkind := pxStarDirLe; - end - else begin - inc(L.bufpos); - scanStarComment(L, tok) - end - end - else - tok.xkind := pxParLe; - end; - '*': begin - inc(L.bufpos); - if L.buf[L.bufpos] = ')' then begin - inc(L.bufpos); tok.xkind := pxStarDirRi - end - else tok.xkind := pxStar - end; - ')': begin tok.xkind := pxParRi; Inc(L.bufpos) end; - '[': begin Inc(L.bufpos); tok.xkind := pxBracketLe end; - ']': begin Inc(L.bufpos); tok.xkind := pxBracketRi end; - '.': begin - inc(L.bufpos); - if L.buf[L.bufpos] = '.' then begin - tok.xkind := pxDotDot; inc(L.bufpos) - end - else tok.xkind := pxDot - end; - '{': begin - Inc(L.bufpos); - case L.buf[L.bufpos] of - '$': begin - Inc(L.bufpos); - skip(L, tok); - getSymbol(L, tok); - tok.xkind := pxCurlyDirLe - end; - '&': begin Inc(L.bufpos); tok.xkind := pxAmp end; - '%': begin Inc(L.bufpos); tok.xkind := pxPer end; - '@': begin Inc(L.bufpos); tok.xkind := pxCommand end; - else scanCurlyComment(L, tok); - end; - end; - '+': begin tok.xkind := pxPlus; inc(L.bufpos) end; - '-': begin tok.xkind := pxMinus; inc(L.bufpos) end; - ':': begin - inc(L.bufpos); - if L.buf[L.bufpos] = '=' then begin - inc(L.bufpos); tok.xkind := pxAsgn; - end - else tok.xkind := pxColon - end; - '<': begin - inc(L.bufpos); - if L.buf[L.bufpos] = '>' then begin - inc(L.bufpos); - tok.xkind := pxNeq - end - else if L.buf[L.bufpos] = '=' then begin - inc(L.bufpos); - tok.xkind := pxLe - end - else tok.xkind := pxLt - end; - '>': begin - inc(L.bufpos); - if L.buf[L.bufpos] = '=' then begin - inc(L.bufpos); - tok.xkind := pxGe - end - else tok.xkind := pxGt - end; - '=': begin tok.xkind := pxEquals; inc(L.bufpos) end; - '@': begin tok.xkind := pxAt; inc(L.bufpos) end; - '^': begin tok.xkind := pxHat; inc(L.bufpos) end; - '}': begin tok.xkind := pxCurlyDirRi; Inc(L.bufpos) end; - '''', '#': getString(L, tok); - '$': getNumber16(L, tok); - '%': getNumber2(L, tok); - lexbase.EndOfFile: tok.xkind := pxEof; - else begin - tok.literal := c + ''; - tok.xkind := pxInvalid; - lexMessage(L, errInvalidToken, c + ' (\' +{&} toString(ord(c)) + ')'); - Inc(L.bufpos); - end - end - end -end; - -end. |