diff options
Diffstat (limited to 'rod/pas2nim/paslex.nim')
-rwxr-xr-x | rod/pas2nim/paslex.nim | 570 |
1 files changed, 0 insertions, 570 deletions
diff --git a/rod/pas2nim/paslex.nim b/rod/pas2nim/paslex.nim deleted file mode 100755 index ed554bdc2..000000000 --- a/rod/pas2nim/paslex.nim +++ /dev/null @@ -1,570 +0,0 @@ -# -# -# Pas2nim - Pascal to Nimrod source converter -# (c) Copyright 2010 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -# This module implements a FreePascal scanner. This is an adaption from -# the scanner module. - -import - nhashes, options, msgs, strutils, platform, idents, lexbase, llstream - -const - MaxLineLength* = 80 # lines longer than this lead to a warning - numChars*: TCharSet = {'0'..'9', 'a'..'z', 'A'..'Z'} - SymChars*: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'} - SymStartChars*: TCharSet = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'} - OpChars*: TCharSet = {'+', '-', '*', '/', '<', '>', '!', '?', '^', '.', '|', - '=', ':', '%', '&', '$', '@', '~', '\x80'..'\xFF'} - -# keywords are sorted! - -type - TTokKind* = enum - pxInvalid, pxEof, - pxAnd, pxArray, pxAs, pxAsm, pxBegin, pxCase, pxClass, pxConst, - pxConstructor, pxDestructor, pxDiv, pxDo, pxDownto, pxElse, pxEnd, pxExcept, - pxExports, pxFinalization, pxFinally, pxFor, pxFunction, pxGoto, pxIf, - pxImplementation, pxIn, pxInherited, pxInitialization, pxInline, - pxInterface, pxIs, pxLabel, pxLibrary, pxMod, pxNil, pxNot, pxObject, pxOf, - pxOr, pxOut, pxPacked, pxProcedure, pxProgram, pxProperty, pxRaise, - pxRecord, pxRepeat, pxResourcestring, pxSet, pxShl, pxShr, pxThen, - pxThreadvar, pxTo, pxTry, pxType, pxUnit, pxUntil, pxUses, pxVar, pxWhile, - pxWith, pxXor, - pxComment, # ordinary comment - pxCommand, # {@} - pxAmp, # {&} - pxPer, # {%} - pxStrLit, pxSymbol, # a symbol - pxIntLit, pxInt64Lit, # long constant like 0x70fffffff or out of int range - pxFloatLit, pxParLe, pxParRi, pxBracketLe, pxBracketRi, pxComma, - pxSemiColon, pxColon, # operators - pxAsgn, pxEquals, pxDot, pxDotDot, pxHat, pxPlus, pxMinus, pxStar, pxSlash, - pxLe, pxLt, pxGe, pxGt, pxNeq, pxAt, pxStarDirLe, pxStarDirRi, pxCurlyDirLe, - pxCurlyDirRi - TTokKinds* = set[TTokKind] - -const - Keywords = ["and", "array", "as", "asm", "begin", "case", "class", "const", - "constructor", "destructor", "div", "do", "downto", "else", "end", "except", - "exports", "finalization", "finally", "for", "function", "goto", "if", - "implementation", "in", "inherited", "initialization", "inline", - "interface", "is", "label", "library", "mod", "nil", "not", "object", "of", - "or", "out", "packed", "procedure", "program", "property", "raise", - "record", "repeat", "resourcestring", "set", "shl", "shr", "then", - "threadvar", "to", "try", "type", "unit", "until", "uses", "var", "while", - "with", "xor"] - - firstKeyword = pxAnd - lastKeyword = pxXor - -type - TNumericalBase* = enum base10, base2, base8, base16 - TToken* = object - xkind*: TTokKind # the type of the token - ident*: PIdent # the parsed identifier - iNumber*: BiggestInt # the parsed integer literal - fNumber*: BiggestFloat # the parsed floating point literal - base*: TNumericalBase # the numerical base; only valid for int - # or float literals - literal*: string # the parsed (string) literal - - TLexer* = object of TBaseLexer - filename*: string - - -proc getTok*(L: var TLexer, tok: var TToken) -proc PrintTok*(tok: TToken) -proc `$`*(tok: TToken): string -# implementation - -var - dummyIdent: PIdent - gLinesCompiled: int - -proc fillToken(L: var TToken) = - L.xkind = pxInvalid - L.iNumber = 0 - L.literal = "" - L.fNumber = 0.0 - L.base = base10 - L.ident = dummyIdent # this prevents many bugs! - -proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) = - openBaseLexer(lex, inputstream) - lex.filename = filename - -proc closeLexer*(lex: var TLexer) = - inc(gLinesCompiled, lex.LineNumber) - closeBaseLexer(lex) - -proc getColumn(L: TLexer): int = - result = getColNumber(L, L.bufPos) - -proc getLineInfo*(L: TLexer): TLineInfo = - result = newLineInfo(L.filename, L.linenumber, getColNumber(L, L.bufpos)) - -proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") = - msgs.GenericMessage(getLineInfo(L), msg, arg) - -proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = - var info = newLineInfo(L.filename, L.linenumber, pos - L.lineStart) - msgs.GenericMessage(info, msg, arg) - -proc TokKindToStr*(k: TTokKind): string = - case k - of pxEof: result = "[EOF]" - of firstKeyword..lastKeyword: - result = keywords[ord(k)-ord(firstKeyword)] - of pxInvalid, pxComment, pxStrLit: result = "string literal" - of pxCommand: result = "{@" - of pxAmp: result = "{&" - of pxPer: result = "{%" - of pxSymbol: result = "identifier" - of pxIntLit, pxInt64Lit: result = "integer literal" - of pxFloatLit: result = "floating point literal" - of pxParLe: result = "(" - of pxParRi: result = ")" - of pxBracketLe: result = "[" - of pxBracketRi: result = "]" - of pxComma: result = "," - of pxSemiColon: result = ";" - of pxColon: result = ":" - of pxAsgn: result = ":=" - of pxEquals: result = "=" - of pxDot: result = "." - of pxDotDot: result = ".." - of pxHat: result = "^" - of pxPlus: result = "+" - of pxMinus: result = "-" - of pxStar: result = "*" - of pxSlash: result = "/" - of pxLe: result = "<=" - of pxLt: result = "<" - of pxGe: result = ">=" - of pxGt: result = ">" - of pxNeq: result = "<>" - of pxAt: result = "@" - of pxStarDirLe: result = "(*$" - of pxStarDirRi: result = "*)" - of pxCurlyDirLe: result = "{$" - of pxCurlyDirRi: result = "}" - -proc `$`(tok: TToken): string = - case tok.xkind - of pxInvalid, pxComment, pxStrLit: result = tok.literal - of pxSymbol: result = tok.ident.s - of pxIntLit, pxInt64Lit: result = $tok.iNumber - of pxFloatLit: result = $tok.fNumber - else: result = TokKindToStr(tok.xkind) - -proc PrintTok(tok: TToken) = - writeln(stdout, $tok) - -proc setKeyword(L: var TLexer, tok: var TToken) = - var x = binaryStrSearch(keywords, toLower(tok.ident.s)) - if x < 0: tok.xkind = pxSymbol - else: tok.xKind = TTokKind(x + ord(firstKeyword)) - -proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: TCharSet) = - # matches ([chars]_)* - var pos = L.bufpos # use registers for pos, buf - var buf = L.buf - while true: - if buf[pos] in chars: - add(tok.literal, buf[pos]) - Inc(pos) - else: - break - if buf[pos] == '_': - add(tok.literal, '_') - Inc(pos) - L.bufPos = pos - -proc isFloatLiteral(s: string): bool = - for i in countup(0, len(s)-1): - if s[i] in {'.', 'e', 'E'}: - return true - -proc getNumber2(L: var TLexer, tok: var TToken) = - var pos = L.bufpos + 1 # skip % - if not (L.buf[pos] in {'0'..'1'}): - # BUGFIX for %date% - tok.xkind = pxInvalid - add(tok.literal, '%') - inc(L.bufpos) - return - tok.base = base2 - var xi: biggestInt = 0 - var bits = 0 - while true: - case L.buf[pos] - of 'A'..'Z', 'a'..'z', '2'..'9', '.': - lexMessage(L, errInvalidNumber) - inc(pos) - of '_': - inc(pos) - of '0', '1': - xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0')) - inc(pos) - inc(bits) - else: break - tok.iNumber = xi - if (bits > 32): tok.xkind = pxInt64Lit - else: tok.xkind = pxIntLit - L.bufpos = pos - -proc getNumber16(L: var TLexer, tok: var TToken) = - var pos = L.bufpos + 1 # skip $ - tok.base = base16 - var xi: biggestInt = 0 - var bits = 0 - while true: - case L.buf[pos] - of 'G'..'Z', 'g'..'z', '.': - lexMessage(L, errInvalidNumber) - inc(pos) - of '_': inc(pos) - of '0'..'9': - xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0')) - inc(pos) - inc(bits, 4) - of 'a'..'f': - xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10) - inc(pos) - inc(bits, 4) - of 'A'..'F': - xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10) - inc(pos) - inc(bits, 4) - else: break - tok.iNumber = xi - if (bits > 32): - tok.xkind = pxInt64Lit - else: - tok.xkind = pxIntLit - L.bufpos = pos - -proc getNumber10(L: var TLexer, tok: var TToken) = - tok.base = base10 - matchUnderscoreChars(L, tok, {'0'..'9'}) - if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): - add(tok.literal, '.') - inc(L.bufpos) - matchUnderscoreChars(L, tok, {'e', 'E', '+', '-', '0'..'9'}) - try: - if isFloatLiteral(tok.literal): - tok.fnumber = parseFloat(tok.literal) - tok.xkind = pxFloatLit - else: - tok.iNumber = ParseInt(tok.literal) - if (tok.iNumber < low(int32)) or (tok.iNumber > high(int32)): - tok.xkind = pxInt64Lit - else: - tok.xkind = pxIntLit - except EInvalidValue: - lexMessage(L, errInvalidNumber, tok.literal) - except EOverflow: - lexMessage(L, errNumberOutOfRange, tok.literal) - -proc HandleCRLF(L: var TLexer, pos: int): int = - case L.buf[pos] - of CR: result = lexbase.HandleCR(L, pos) - of LF: result = lexbase.HandleLF(L, pos) - else: result = pos - -proc getString(L: var TLexer, tok: var TToken) = - var xi: int - var pos = L.bufPos - var buf = L.buf - while true: - if buf[pos] == '\'': - inc(pos) - while true: - case buf[pos] - of CR, LF, lexbase.EndOfFile: - lexMessage(L, errClosingQuoteExpected) - break - of '\'': - inc(pos) - if buf[pos] == '\'': - inc(pos) - add(tok.literal, '\'') - else: - break - else: - add(tok.literal, buf[pos]) - inc(pos) - elif buf[pos] == '#': - inc(pos) - xi = 0 - case buf[pos] - of '$': - inc(pos) - xi = 0 - while true: - case buf[pos] - of '0'..'9': xi = (xi shl 4) or (ord(buf[pos]) - ord('0')) - of 'a'..'f': xi = (xi shl 4) or (ord(buf[pos]) - ord('a') + 10) - of 'A'..'F': xi = (xi shl 4) or (ord(buf[pos]) - ord('A') + 10) - else: break - inc(pos) - of '0'..'9': - xi = 0 - while buf[pos] in {'0'..'9'}: - xi = (xi * 10) + (ord(buf[pos]) - ord('0')) - inc(pos) - else: lexMessage(L, errInvalidCharacterConstant) - if (xi <= 255): add(tok.literal, Chr(xi)) - else: lexMessage(L, errInvalidCharacterConstant) - else: - break - tok.xkind = pxStrLit - L.bufpos = pos - -proc getSymbol(L: var TLexer, tok: var TToken) = - var h: THash = 0 - var pos = L.bufpos - var buf = L.buf - while true: - var c = buf[pos] - case c - of 'a'..'z', '0'..'9', '\x80'..'\xFF': - h = h +% Ord(c) - h = h +% h shl 10 - h = h xor (h shr 6) - of 'A'..'Z': - c = chr(ord(c) + (ord('a') - ord('A'))) # toLower() - h = h +% Ord(c) - h = h +% h shl 10 - h = h xor (h shr 6) - of '_': nil - else: break - Inc(pos) - h = h +% h shl 3 - h = h xor (h shr 11) - h = h +% h shl 15 - tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) - L.bufpos = pos - setKeyword(L, tok) - -proc scanLineComment(L: var TLexer, tok: var TToken) = - var pos = L.bufpos - var buf = L.buf - # a comment ends if the next line does not start with the // on the same - # column after only whitespace - tok.xkind = pxComment - var col = getColNumber(L, pos) - while true: - inc(pos, 2) # skip // - add(tok.literal, '#') - while not (buf[pos] in {CR, LF, lexbase.EndOfFile}): - add(tok.literal, buf[pos]) - inc(pos) - pos = handleCRLF(L, pos) - buf = L.buf - var indent = 0 - while buf[pos] == ' ': - inc(pos) - inc(indent) - if (col == indent) and (buf[pos] == '/') and (buf[pos + 1] == '/'): - tok.literal = tok.literal & "\n" - else: - break - L.bufpos = pos - -proc scanCurlyComment(L: var TLexer, tok: var TToken) = - var pos = L.bufpos - var buf = L.buf - tok.literal = "#" - tok.xkind = pxComment - while true: - case buf[pos] - of CR, LF: - pos = HandleCRLF(L, pos) - buf = L.buf - add(tok.literal, "\n#") - of '}': - inc(pos) - break - of lexbase.EndOfFile: lexMessage(L, errTokenExpected, "}") - else: - add(tok.literal, buf[pos]) - inc(pos) - L.bufpos = pos - -proc scanStarComment(L: var TLexer, tok: var TToken) = - var pos = L.bufpos - var buf = L.buf - tok.literal = "#" - tok.xkind = pxComment - while true: - case buf[pos] - of CR, LF: - pos = HandleCRLF(L, pos) - buf = L.buf - add(tok.literal, "\n#") - of '*': - inc(pos) - if buf[pos] == ')': - inc(pos) - break - else: - add(tok.literal, '*') - of lexbase.EndOfFile: - lexMessage(L, errTokenExpected, "*)") - else: - add(tok.literal, buf[pos]) - inc(pos) - L.bufpos = pos - -proc skip(L: var TLexer, tok: var TToken) = - var pos = L.bufpos - var buf = L.buf - while true: - case buf[pos] - of ' ', Tabulator: - Inc(pos) # newline is special: - of CR, LF: - pos = HandleCRLF(L, pos) - buf = L.buf - else: - break # EndOfFile also leaves the loop - L.bufpos = pos - -proc getTok(L: var TLexer, tok: var TToken) = - tok.xkind = pxInvalid - fillToken(tok) - skip(L, tok) - var c = L.buf[L.bufpos] - if c in SymStartChars: - getSymbol(L, tok) - elif c in {'0'..'9'}: - getNumber10(L, tok) - else: - case c - of ';': - tok.xkind = pxSemicolon - Inc(L.bufpos) - of '/': - if L.buf[L.bufpos + 1] == '/': - scanLineComment(L, tok) - else: - tok.xkind = pxSlash - inc(L.bufpos) - of ',': - tok.xkind = pxComma - Inc(L.bufpos) - of '(': - Inc(L.bufpos) - if (L.buf[L.bufPos] == '*'): - if (L.buf[L.bufPos + 1] == '$'): - Inc(L.bufpos, 2) - skip(L, tok) - getSymbol(L, tok) - tok.xkind = pxStarDirLe - else: - inc(L.bufpos) - scanStarComment(L, tok) - else: - tok.xkind = pxParLe - of '*': - inc(L.bufpos) - if L.buf[L.bufpos] == ')': - inc(L.bufpos) - tok.xkind = pxStarDirRi - else: - tok.xkind = pxStar - of ')': - tok.xkind = pxParRi - Inc(L.bufpos) - of '[': - Inc(L.bufpos) - tok.xkind = pxBracketLe - of ']': - Inc(L.bufpos) - tok.xkind = pxBracketRi - of '.': - inc(L.bufpos) - if L.buf[L.bufpos] == '.': - tok.xkind = pxDotDot - inc(L.bufpos) - else: - tok.xkind = pxDot - of '{': - Inc(L.bufpos) - case L.buf[L.bufpos] - of '$': - Inc(L.bufpos) - skip(L, tok) - getSymbol(L, tok) - tok.xkind = pxCurlyDirLe - of '&': - Inc(L.bufpos) - tok.xkind = pxAmp - of '%': - Inc(L.bufpos) - tok.xkind = pxPer - of '@': - Inc(L.bufpos) - tok.xkind = pxCommand - else: scanCurlyComment(L, tok) - of '+': - tok.xkind = pxPlus - inc(L.bufpos) - of '-': - tok.xkind = pxMinus - inc(L.bufpos) - of ':': - inc(L.bufpos) - if L.buf[L.bufpos] == '=': - inc(L.bufpos) - tok.xkind = pxAsgn - else: - tok.xkind = pxColon - of '<': - inc(L.bufpos) - if L.buf[L.bufpos] == '>': - inc(L.bufpos) - tok.xkind = pxNeq - elif L.buf[L.bufpos] == '=': - inc(L.bufpos) - tok.xkind = pxLe - else: - tok.xkind = pxLt - of '>': - inc(L.bufpos) - if L.buf[L.bufpos] == '=': - inc(L.bufpos) - tok.xkind = pxGe - else: - tok.xkind = pxGt - of '=': - tok.xkind = pxEquals - inc(L.bufpos) - of '@': - tok.xkind = pxAt - inc(L.bufpos) - of '^': - tok.xkind = pxHat - inc(L.bufpos) - of '}': - tok.xkind = pxCurlyDirRi - Inc(L.bufpos) - of '\'', '#': - getString(L, tok) - of '$': - getNumber16(L, tok) - of '%': - getNumber2(L, tok) - of lexbase.EndOfFile: - tok.xkind = pxEof - else: - tok.literal = c & "" - tok.xkind = pxInvalid - lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')') - Inc(L.bufpos) |