summary refs log tree commit diff stats
path: root/rod/pas2nim/paslex.nim
diff options
context:
space:
mode:
Diffstat (limited to 'rod/pas2nim/paslex.nim')
-rwxr-xr-xrod/pas2nim/paslex.nim570
1 files changed, 0 insertions, 570 deletions
diff --git a/rod/pas2nim/paslex.nim b/rod/pas2nim/paslex.nim
deleted file mode 100755
index ed554bdc2..000000000
--- a/rod/pas2nim/paslex.nim
+++ /dev/null
@@ -1,570 +0,0 @@
-#
-#
-#      Pas2nim - Pascal to Nimrod source converter
-#        (c) Copyright 2010 Andreas Rumpf
-#
-#    See the file "copying.txt", included in this
-#    distribution, for details about the copyright.
-#
-
-# This module implements a FreePascal scanner. This is an adaption from
-# the scanner module.
-
-import 
-  nhashes, options, msgs, strutils, platform, idents, lexbase, llstream
-
-const 
-  MaxLineLength* = 80         # lines longer than this lead to a warning
-  numChars*: TCharSet = {'0'..'9', 'a'..'z', 'A'..'Z'} 
-  SymChars*: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
-  SymStartChars*: TCharSet = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
-  OpChars*: TCharSet = {'+', '-', '*', '/', '<', '>', '!', '?', '^', '.', '|', 
-    '=', ':', '%', '&', '$', '@', '~', '\x80'..'\xFF'}
-
-# keywords are sorted!
-
-type
-  TTokKind* = enum 
-    pxInvalid, pxEof,         
-    pxAnd, pxArray, pxAs, pxAsm, pxBegin, pxCase, pxClass, pxConst, 
-    pxConstructor, pxDestructor, pxDiv, pxDo, pxDownto, pxElse, pxEnd, pxExcept, 
-    pxExports, pxFinalization, pxFinally, pxFor, pxFunction, pxGoto, pxIf, 
-    pxImplementation, pxIn, pxInherited, pxInitialization, pxInline, 
-    pxInterface, pxIs, pxLabel, pxLibrary, pxMod, pxNil, pxNot, pxObject, pxOf, 
-    pxOr, pxOut, pxPacked, pxProcedure, pxProgram, pxProperty, pxRaise, 
-    pxRecord, pxRepeat, pxResourcestring, pxSet, pxShl, pxShr, pxThen, 
-    pxThreadvar, pxTo, pxTry, pxType, pxUnit, pxUntil, pxUses, pxVar, pxWhile, 
-    pxWith, pxXor,
-    pxComment,                # ordinary comment
-    pxCommand,                # {@}
-    pxAmp,                    # {&}
-    pxPer,                    # {%}
-    pxStrLit, pxSymbol,       # a symbol
-    pxIntLit, pxInt64Lit, # long constant like 0x70fffffff or out of int range
-    pxFloatLit, pxParLe, pxParRi, pxBracketLe, pxBracketRi, pxComma, 
-    pxSemiColon, pxColon,     # operators
-    pxAsgn, pxEquals, pxDot, pxDotDot, pxHat, pxPlus, pxMinus, pxStar, pxSlash, 
-    pxLe, pxLt, pxGe, pxGt, pxNeq, pxAt, pxStarDirLe, pxStarDirRi, pxCurlyDirLe, 
-    pxCurlyDirRi
-  TTokKinds* = set[TTokKind]
-
-const 
-  Keywords = ["and", "array", "as", "asm", "begin", "case", "class", "const", 
-    "constructor", "destructor", "div", "do", "downto", "else", "end", "except", 
-    "exports", "finalization", "finally", "for", "function", "goto", "if", 
-    "implementation", "in", "inherited", "initialization", "inline", 
-    "interface", "is", "label", "library", "mod", "nil", "not", "object", "of", 
-    "or", "out", "packed", "procedure", "program", "property", "raise", 
-    "record", "repeat", "resourcestring", "set", "shl", "shr", "then", 
-    "threadvar", "to", "try", "type", "unit", "until", "uses", "var", "while", 
-    "with", "xor"]
-
-  firstKeyword = pxAnd
-  lastKeyword = pxXor
-
-type
-  TNumericalBase* = enum base10, base2, base8, base16
-  TToken* = object
-    xkind*: TTokKind          # the type of the token
-    ident*: PIdent            # the parsed identifier
-    iNumber*: BiggestInt      # the parsed integer literal
-    fNumber*: BiggestFloat    # the parsed floating point literal
-    base*: TNumericalBase     # the numerical base; only valid for int
-                              # or float literals
-    literal*: string          # the parsed (string) literal
-  
-  TLexer* = object of TBaseLexer
-    filename*: string
-  
-
-proc getTok*(L: var TLexer, tok: var TToken)
-proc PrintTok*(tok: TToken)
-proc `$`*(tok: TToken): string
-# implementation
-
-var
-  dummyIdent: PIdent
-  gLinesCompiled: int
-
-proc fillToken(L: var TToken) = 
-  L.xkind = pxInvalid
-  L.iNumber = 0
-  L.literal = ""
-  L.fNumber = 0.0
-  L.base = base10
-  L.ident = dummyIdent        # this prevents many bugs!
-  
-proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) = 
-  openBaseLexer(lex, inputstream)
-  lex.filename = filename
-
-proc closeLexer*(lex: var TLexer) = 
-  inc(gLinesCompiled, lex.LineNumber)
-  closeBaseLexer(lex)
-
-proc getColumn(L: TLexer): int = 
-  result = getColNumber(L, L.bufPos)
-
-proc getLineInfo*(L: TLexer): TLineInfo = 
-  result = newLineInfo(L.filename, L.linenumber, getColNumber(L, L.bufpos))
-
-proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") = 
-  msgs.GenericMessage(getLineInfo(L), msg, arg)
-
-proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = 
-  var info = newLineInfo(L.filename, L.linenumber, pos - L.lineStart)
-  msgs.GenericMessage(info, msg, arg)
-
-proc TokKindToStr*(k: TTokKind): string =
-  case k
-  of pxEof: result = "[EOF]"
-  of firstKeyword..lastKeyword:
-    result = keywords[ord(k)-ord(firstKeyword)]
-  of pxInvalid, pxComment, pxStrLit: result = "string literal"
-  of pxCommand: result = "{@"
-  of pxAmp: result = "{&"
-  of pxPer: result = "{%"
-  of pxSymbol: result = "identifier"
-  of pxIntLit, pxInt64Lit: result = "integer literal"
-  of pxFloatLit: result = "floating point literal"
-  of pxParLe: result = "("
-  of pxParRi: result = ")"
-  of pxBracketLe: result = "["
-  of pxBracketRi: result = "]"
-  of pxComma: result = ","
-  of pxSemiColon: result = ";"
-  of pxColon: result = ":"
-  of pxAsgn: result = ":="
-  of pxEquals: result = "="
-  of pxDot: result = "."
-  of pxDotDot: result = ".."
-  of pxHat: result = "^"
-  of pxPlus: result = "+"
-  of pxMinus: result = "-"
-  of pxStar: result = "*"
-  of pxSlash: result = "/"
-  of pxLe: result = "<="
-  of pxLt: result = "<"
-  of pxGe: result = ">="
-  of pxGt: result = ">"
-  of pxNeq: result = "<>"
-  of pxAt: result = "@"
-  of pxStarDirLe: result = "(*$"
-  of pxStarDirRi: result = "*)"
-  of pxCurlyDirLe: result = "{$"
-  of pxCurlyDirRi: result = "}"
-
-proc `$`(tok: TToken): string = 
-  case tok.xkind
-  of pxInvalid, pxComment, pxStrLit: result = tok.literal
-  of pxSymbol: result = tok.ident.s
-  of pxIntLit, pxInt64Lit: result = $tok.iNumber
-  of pxFloatLit: result = $tok.fNumber
-  else: result = TokKindToStr(tok.xkind)
-  
-proc PrintTok(tok: TToken) = 
-  writeln(stdout, $tok)
-
-proc setKeyword(L: var TLexer, tok: var TToken) = 
-  var x = binaryStrSearch(keywords, toLower(tok.ident.s))
-  if x < 0: tok.xkind = pxSymbol
-  else: tok.xKind = TTokKind(x + ord(firstKeyword))
-  
-proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: TCharSet) = 
-  # matches ([chars]_)*
-  var pos = L.bufpos              # use registers for pos, buf
-  var buf = L.buf
-  while true: 
-    if buf[pos] in chars: 
-      add(tok.literal, buf[pos])
-      Inc(pos)
-    else: 
-      break 
-    if buf[pos] == '_': 
-      add(tok.literal, '_')
-      Inc(pos)
-  L.bufPos = pos
-
-proc isFloatLiteral(s: string): bool = 
-  for i in countup(0, len(s)-1): 
-    if s[i] in {'.', 'e', 'E'}: 
-      return true
-
-proc getNumber2(L: var TLexer, tok: var TToken) = 
-  var pos = L.bufpos + 1 # skip %
-  if not (L.buf[pos] in {'0'..'1'}): 
-    # BUGFIX for %date%
-    tok.xkind = pxInvalid
-    add(tok.literal, '%')
-    inc(L.bufpos)
-    return 
-  tok.base = base2
-  var xi: biggestInt = 0
-  var bits = 0
-  while true: 
-    case L.buf[pos]
-    of 'A'..'Z', 'a'..'z', '2'..'9', '.': 
-      lexMessage(L, errInvalidNumber)
-      inc(pos)
-    of '_': 
-      inc(pos)
-    of '0', '1': 
-      xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
-      inc(pos)
-      inc(bits)
-    else: break 
-  tok.iNumber = xi
-  if (bits > 32): tok.xkind = pxInt64Lit
-  else: tok.xkind = pxIntLit
-  L.bufpos = pos
-
-proc getNumber16(L: var TLexer, tok: var TToken) = 
-  var pos = L.bufpos + 1          # skip $
-  tok.base = base16
-  var xi: biggestInt = 0
-  var bits = 0
-  while true: 
-    case L.buf[pos]
-    of 'G'..'Z', 'g'..'z', '.': 
-      lexMessage(L, errInvalidNumber)
-      inc(pos)
-    of '_': inc(pos)
-    of '0'..'9': 
-      xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
-      inc(pos)
-      inc(bits, 4)
-    of 'a'..'f': 
-      xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
-      inc(pos)
-      inc(bits, 4)
-    of 'A'..'F': 
-      xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
-      inc(pos)
-      inc(bits, 4)
-    else: break 
-  tok.iNumber = xi
-  if (bits > 32): 
-    tok.xkind = pxInt64Lit
-  else: 
-    tok.xkind = pxIntLit
-  L.bufpos = pos
-
-proc getNumber10(L: var TLexer, tok: var TToken) = 
-  tok.base = base10
-  matchUnderscoreChars(L, tok, {'0'..'9'})
-  if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}): 
-    add(tok.literal, '.')
-    inc(L.bufpos)
-    matchUnderscoreChars(L, tok, {'e', 'E', '+', '-', '0'..'9'})
-  try: 
-    if isFloatLiteral(tok.literal): 
-      tok.fnumber = parseFloat(tok.literal)
-      tok.xkind = pxFloatLit
-    else: 
-      tok.iNumber = ParseInt(tok.literal)
-      if (tok.iNumber < low(int32)) or (tok.iNumber > high(int32)): 
-        tok.xkind = pxInt64Lit
-      else: 
-        tok.xkind = pxIntLit
-  except EInvalidValue: 
-    lexMessage(L, errInvalidNumber, tok.literal)
-  except EOverflow: 
-    lexMessage(L, errNumberOutOfRange, tok.literal)
-  
-proc HandleCRLF(L: var TLexer, pos: int): int = 
-  case L.buf[pos]
-  of CR: result = lexbase.HandleCR(L, pos)
-  of LF: result = lexbase.HandleLF(L, pos)
-  else: result = pos
-  
-proc getString(L: var TLexer, tok: var TToken) = 
-  var xi: int
-  var pos = L.bufPos
-  var buf = L.buf
-  while true: 
-    if buf[pos] == '\'': 
-      inc(pos)
-      while true: 
-        case buf[pos]
-        of CR, LF, lexbase.EndOfFile: 
-          lexMessage(L, errClosingQuoteExpected)
-          break 
-        of '\'': 
-          inc(pos)
-          if buf[pos] == '\'': 
-            inc(pos)
-            add(tok.literal, '\'')
-          else: 
-            break 
-        else: 
-          add(tok.literal, buf[pos])
-          inc(pos)
-    elif buf[pos] == '#': 
-      inc(pos)
-      xi = 0
-      case buf[pos]
-      of '$': 
-        inc(pos)
-        xi = 0
-        while true: 
-          case buf[pos]
-          of '0'..'9': xi = (xi shl 4) or (ord(buf[pos]) - ord('0'))
-          of 'a'..'f': xi = (xi shl 4) or (ord(buf[pos]) - ord('a') + 10)
-          of 'A'..'F': xi = (xi shl 4) or (ord(buf[pos]) - ord('A') + 10)
-          else: break 
-          inc(pos)
-      of '0'..'9': 
-        xi = 0
-        while buf[pos] in {'0'..'9'}: 
-          xi = (xi * 10) + (ord(buf[pos]) - ord('0'))
-          inc(pos)
-      else: lexMessage(L, errInvalidCharacterConstant)
-      if (xi <= 255): add(tok.literal, Chr(xi))
-      else: lexMessage(L, errInvalidCharacterConstant)
-    else: 
-      break 
-  tok.xkind = pxStrLit
-  L.bufpos = pos
-
-proc getSymbol(L: var TLexer, tok: var TToken) = 
-  var h: THash = 0
-  var pos = L.bufpos
-  var buf = L.buf
-  while true: 
-    var c = buf[pos]
-    case c
-    of 'a'..'z', '0'..'9', '\x80'..'\xFF': 
-      h = h +% Ord(c)
-      h = h +% h shl 10
-      h = h xor (h shr 6)
-    of 'A'..'Z': 
-      c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
-      h = h +% Ord(c)
-      h = h +% h shl 10
-      h = h xor (h shr 6)
-    of '_': nil
-    else: break 
-    Inc(pos)
-  h = h +% h shl 3
-  h = h xor (h shr 11)
-  h = h +% h shl 15
-  tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
-  L.bufpos = pos
-  setKeyword(L, tok)
-
-proc scanLineComment(L: var TLexer, tok: var TToken) = 
-  var pos = L.bufpos
-  var buf = L.buf 
-  # a comment ends if the next line does not start with the // on the same
-  # column after only whitespace
-  tok.xkind = pxComment
-  var col = getColNumber(L, pos)
-  while true: 
-    inc(pos, 2)               # skip //
-    add(tok.literal, '#')
-    while not (buf[pos] in {CR, LF, lexbase.EndOfFile}): 
-      add(tok.literal, buf[pos])
-      inc(pos)
-    pos = handleCRLF(L, pos)
-    buf = L.buf
-    var indent = 0
-    while buf[pos] == ' ': 
-      inc(pos)
-      inc(indent)
-    if (col == indent) and (buf[pos] == '/') and (buf[pos + 1] == '/'): 
-      tok.literal = tok.literal & "\n"
-    else: 
-      break 
-  L.bufpos = pos
-
-proc scanCurlyComment(L: var TLexer, tok: var TToken) = 
-  var pos = L.bufpos
-  var buf = L.buf
-  tok.literal = "#"
-  tok.xkind = pxComment
-  while true: 
-    case buf[pos]
-    of CR, LF: 
-      pos = HandleCRLF(L, pos)
-      buf = L.buf
-      add(tok.literal, "\n#")
-    of '}': 
-      inc(pos)
-      break 
-    of lexbase.EndOfFile: lexMessage(L, errTokenExpected, "}")
-    else: 
-      add(tok.literal, buf[pos])
-      inc(pos)
-  L.bufpos = pos
-
-proc scanStarComment(L: var TLexer, tok: var TToken) = 
-  var pos = L.bufpos
-  var buf = L.buf
-  tok.literal = "#"
-  tok.xkind = pxComment
-  while true: 
-    case buf[pos]
-    of CR, LF: 
-      pos = HandleCRLF(L, pos)
-      buf = L.buf
-      add(tok.literal, "\n#")
-    of '*': 
-      inc(pos)
-      if buf[pos] == ')': 
-        inc(pos)
-        break 
-      else: 
-        add(tok.literal, '*')
-    of lexbase.EndOfFile: 
-      lexMessage(L, errTokenExpected, "*)")
-    else: 
-      add(tok.literal, buf[pos])
-      inc(pos)
-  L.bufpos = pos
-
-proc skip(L: var TLexer, tok: var TToken) = 
-  var pos = L.bufpos
-  var buf = L.buf
-  while true: 
-    case buf[pos]
-    of ' ', Tabulator: 
-      Inc(pos)                # newline is special:
-    of CR, LF: 
-      pos = HandleCRLF(L, pos)
-      buf = L.buf
-    else: 
-      break                   # EndOfFile also leaves the loop
-  L.bufpos = pos
-
-proc getTok(L: var TLexer, tok: var TToken) = 
-  tok.xkind = pxInvalid
-  fillToken(tok)
-  skip(L, tok)
-  var c = L.buf[L.bufpos]
-  if c in SymStartChars: 
-    getSymbol(L, tok)
-  elif c in {'0'..'9'}: 
-    getNumber10(L, tok)
-  else: 
-    case c
-    of ';': 
-      tok.xkind = pxSemicolon
-      Inc(L.bufpos)
-    of '/': 
-      if L.buf[L.bufpos + 1] == '/': 
-        scanLineComment(L, tok)
-      else: 
-        tok.xkind = pxSlash
-        inc(L.bufpos)
-    of ',': 
-      tok.xkind = pxComma
-      Inc(L.bufpos)
-    of '(': 
-      Inc(L.bufpos)
-      if (L.buf[L.bufPos] == '*'): 
-        if (L.buf[L.bufPos + 1] == '$'): 
-          Inc(L.bufpos, 2)
-          skip(L, tok)
-          getSymbol(L, tok)
-          tok.xkind = pxStarDirLe
-        else: 
-          inc(L.bufpos)
-          scanStarComment(L, tok)
-      else: 
-        tok.xkind = pxParLe
-    of '*': 
-      inc(L.bufpos)
-      if L.buf[L.bufpos] == ')': 
-        inc(L.bufpos)
-        tok.xkind = pxStarDirRi
-      else: 
-        tok.xkind = pxStar
-    of ')': 
-      tok.xkind = pxParRi
-      Inc(L.bufpos)
-    of '[': 
-      Inc(L.bufpos)
-      tok.xkind = pxBracketLe
-    of ']': 
-      Inc(L.bufpos)
-      tok.xkind = pxBracketRi
-    of '.': 
-      inc(L.bufpos)
-      if L.buf[L.bufpos] == '.': 
-        tok.xkind = pxDotDot
-        inc(L.bufpos)
-      else: 
-        tok.xkind = pxDot
-    of '{': 
-      Inc(L.bufpos)
-      case L.buf[L.bufpos]
-      of '$': 
-        Inc(L.bufpos)
-        skip(L, tok)
-        getSymbol(L, tok)
-        tok.xkind = pxCurlyDirLe
-      of '&': 
-        Inc(L.bufpos)
-        tok.xkind = pxAmp
-      of '%': 
-        Inc(L.bufpos)
-        tok.xkind = pxPer
-      of '@': 
-        Inc(L.bufpos)
-        tok.xkind = pxCommand
-      else: scanCurlyComment(L, tok)
-    of '+': 
-      tok.xkind = pxPlus
-      inc(L.bufpos)
-    of '-': 
-      tok.xkind = pxMinus
-      inc(L.bufpos)
-    of ':': 
-      inc(L.bufpos)
-      if L.buf[L.bufpos] == '=': 
-        inc(L.bufpos)
-        tok.xkind = pxAsgn
-      else: 
-        tok.xkind = pxColon
-    of '<': 
-      inc(L.bufpos)
-      if L.buf[L.bufpos] == '>': 
-        inc(L.bufpos)
-        tok.xkind = pxNeq
-      elif L.buf[L.bufpos] == '=': 
-        inc(L.bufpos)
-        tok.xkind = pxLe
-      else: 
-        tok.xkind = pxLt
-    of '>': 
-      inc(L.bufpos)
-      if L.buf[L.bufpos] == '=': 
-        inc(L.bufpos)
-        tok.xkind = pxGe
-      else: 
-        tok.xkind = pxGt
-    of '=': 
-      tok.xkind = pxEquals
-      inc(L.bufpos)
-    of '@': 
-      tok.xkind = pxAt
-      inc(L.bufpos)
-    of '^': 
-      tok.xkind = pxHat
-      inc(L.bufpos)
-    of '}': 
-      tok.xkind = pxCurlyDirRi
-      Inc(L.bufpos)
-    of '\'', '#': 
-      getString(L, tok)
-    of '$': 
-      getNumber16(L, tok)
-    of '%': 
-      getNumber2(L, tok)
-    of lexbase.EndOfFile: 
-      tok.xkind = pxEof
-    else: 
-      tok.literal = c & ""
-      tok.xkind = pxInvalid
-      lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
-      Inc(L.bufpos)