summary refs log tree commit diff stats
path: root/compiler/lexer.nim
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r--compiler/lexer.nim139
1 files changed, 47 insertions, 92 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index bf9bf5343..6660ff65c 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -1,7 +1,7 @@
 #
 #
 #           The Nimrod Compiler
-#        (c) Copyright 2012 Andreas Rumpf
+#        (c) Copyright 2013 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
 #    distribution, for details about the copyright.
@@ -58,8 +58,7 @@ type
     tkParDotLe, tkParDotRi,   # (. and .)
     tkComma, tkSemiColon,
     tkColon, tkColonColon, tkEquals, tkDot, tkDotDot,
-    tkOpr, tkComment, tkAccent, tkInd, tkSad, 
-    tkDed, # pseudo token types used by the source renderers:
+    tkOpr, tkComment, tkAccent,
     tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr,
     
   TTokTypes* = set[TTokType]
@@ -91,8 +90,8 @@ const
     ")", "[", "]", "{", "}", "[.", ".]", "{.", ".}", "(.", ".)",
     ",", ";",
     ":", "::", "=", ".", "..",
-    "tkOpr", "tkComment", "`", "[new indentation]", 
-    "[same indentation]", "[dedentation]", "tkSpaces", "tkInfixOpr", 
+    "tkOpr", "tkComment", "`",
+    "tkSpaces", "tkInfixOpr",
     "tkPrefixOpr", "tkPostfixOpr"]
 
 type 
@@ -102,7 +101,8 @@ type
     base2, base8, base16
   TToken* = object            # a Nimrod token
     tokType*: TTokType        # the type of the token
-    indent*: int              # the indentation; only valid if tokType = tkIndent
+    indent*: int              # the indentation; != -1 if the token has been
+                              # preceeded with indentation
     ident*: PIdent            # the parsed identifier
     iNumber*: BiggestInt      # the parsed integer literal
     fNumber*: BiggestFloat    # the parsed floating point literal
@@ -113,8 +113,6 @@ type
   
   TLexer* = object of TBaseLexer
     fileIdx*: int32
-    indentStack*: seq[int]    # the indentation stack
-    dedent*: int              # counter for DED token generation
     indentAhead*: int         # if > 0 an indendation has already been read
                               # this is needed because scanning comments
                               # needs so much look-ahead
@@ -122,9 +120,6 @@ type
 
 var gLinesCompiled*: int  # all lines that have been compiled
 
-proc pushInd*(L: var TLexer, indent: int)
-
-proc popInd*(L: var TLexer)
 proc isKeyword*(kind: TTokType): bool
 proc openLexer*(lex: var TLexer, fileidx: int32, inputstream: PLLStream)
 proc rawGetTok*(L: var TLexer, tok: var TToken)
@@ -154,31 +149,14 @@ proc isNimrodIdentifier*(s: string): bool =
       inc(i)
     result = true
 
-proc pushInd(L: var TLexer, indent: int) = 
-  var length = len(L.indentStack)
-  setlen(L.indentStack, length + 1)
-  if (indent > L.indentStack[length - 1]): 
-    L.indentstack[length] = indent
-  else: 
-    InternalError("pushInd")
-  
-proc popInd(L: var TLexer) = 
-  var length = len(L.indentStack)
-  setlen(L.indentStack, length - 1)
-
-proc findIdent(L: TLexer, indent: int): bool = 
-  for i in countdown(len(L.indentStack) - 1, 0): 
-    if L.indentStack[i] == indent: 
-      return true
-
 proc tokToStr*(tok: TToken): string = 
   case tok.tokType
   of tkIntLit..tkInt64Lit: result = $tok.iNumber
   of tkFloatLit..tkFloat64Lit: result = $tok.fNumber
   of tkInvalid, tkStrLit..tkCharLit, tkComment: result = tok.literal
-  of tkParLe..tkColon, tkEof, tkInd, tkSad, tkDed, tkAccent: 
+  of tkParLe..tkColon, tkEof, tkAccent: 
     result = tokTypeToStr[tok.tokType]
-  else: 
+  else:
     if tok.ident != nil:
       result = tok.ident.s
     else: 
@@ -216,7 +194,6 @@ proc fillToken(L: var TToken) =
   
 proc openLexer(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) = 
   openBaseLexer(lex, inputstream)
-  lex.indentStack = @[0]
   lex.fileIdx = fileIdx
   lex.indentAhead = - 1
   inc(lex.Linenumber, inputstream.lineOffset) 
@@ -434,9 +411,10 @@ proc GetNumber(L: var TLexer): TToken =
           result.tokType = tkInt64Lit
         elif result.tokType != tkInt64Lit: 
           lexMessage(L, errInvalidNumber, result.literal)
-  except EInvalidValue: lexMessage(L, errInvalidNumber, result.literal)
-  except EOverflow: lexMessage(L, errNumberOutOfRange, result.literal)
-  except EOutOfRange: lexMessage(L, errNumberOutOfRange, result.literal)
+  except EInvalidValue:
+    lexMessage(L, errInvalidNumber, result.literal)
+  except EOverflow, EOutOfRange:
+    lexMessage(L, errNumberOutOfRange, result.literal)
   L.bufpos = endpos
 
 proc handleHexChar(L: var TLexer, xi: var int) = 
@@ -651,24 +629,6 @@ proc getOperator(L: var TLexer, tok: var TToken) =
     Inc(pos)
   endOperator(L, tok, pos, h)
 
-proc handleIndentation(L: var TLexer, tok: var TToken, indent: int) = 
-  tok.indent = indent
-  var i = high(L.indentStack)
-  if indent > L.indentStack[i]: 
-    tok.tokType = tkInd
-  elif indent == L.indentStack[i]: 
-    tok.tokType = tkSad
-  else: 
-    # check we have the indentation somewhere in the stack:
-    while (i >= 0) and (indent != L.indentStack[i]): 
-      dec(i)
-      inc(L.dedent)
-    dec(L.dedent)
-    tok.tokType = tkDed
-    if i < 0: 
-      tok.tokType = tkSad     # for the parser it is better as SAD
-      lexMessage(L, errInvalidIndentation)
-
 proc scanComment(L: var TLexer, tok: var TToken) = 
   var pos = L.bufpos
   var buf = L.buf 
@@ -705,53 +665,45 @@ proc scanComment(L: var TLexer, tok: var TToken) =
     else:
       if buf[pos] > ' ': 
         L.indentAhead = indent
-        inc(L.dedent)
-      break 
+      break
   L.bufpos = pos
 
-proc skip(L: var TLexer, tok: var TToken) = 
+proc skip(L: var TLexer, tok: var TToken) =
   var pos = L.bufpos
   var buf = L.buf
-  while true: 
+  while true:
     case buf[pos]
-    of ' ': 
+    of ' ':
       Inc(pos)
-    of Tabulator: 
+    of Tabulator:
       lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
-      inc(pos)                # BUGFIX
-    of CR, LF: 
+      inc(pos)
+    of CR, LF:
       pos = HandleCRLF(L, pos)
       buf = L.buf
       var indent = 0
-      while buf[pos] == ' ': 
+      while buf[pos] == ' ':
         Inc(pos)
         Inc(indent)
-      if (buf[pos] > ' '): 
-        handleIndentation(L, tok, indent)
-        break 
-    else: 
+      if buf[pos] > ' ':
+        tok.indent = indent
+        break
+    else:
       break                   # EndOfFile also leaves the loop
   L.bufpos = pos
 
-proc rawGetTok(L: var TLexer, tok: var TToken) = 
+proc rawGetTok(L: var TLexer, tok: var TToken) =
   fillToken(tok)
-  if L.dedent > 0:
-    dec(L.dedent)
-    if L.indentAhead >= 0: 
-      handleIndentation(L, tok, L.indentAhead)
-      L.indentAhead = - 1
-    else:
-      tok.tokType = tkDed
-    return
+  if L.indentAhead >= 0:
+    tok.indent = L.indentAhead
+    L.indentAhead = -1
+  else:
+    tok.indent = -1
   skip(L, tok)
-  # got an documentation comment or tkIndent, return that:
-  if tok.toktype != tkInvalid: return
   var c = L.buf[L.bufpos]
-  if c in SymStartChars - {'r', 'R', 'l'}: 
+  if c in SymStartChars - {'r', 'R', 'l'}:
     getSymbol(L, tok)
-  elif c in {'0'..'9'}: 
-    tok = getNumber(L)
-  else: 
+  else:
     case c
     of '#': 
       scanComment(L, tok)
@@ -769,10 +721,10 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
     of 'l': 
       # if we parsed exactly one character and its a small L (l), this
       # is treated as a warning because it may be confused with the number 1
-      if not (L.buf[L.bufpos + 1] in (SymChars + {'_'})): 
+      if L.buf[L.bufpos+1] notin (SymChars + {'_'}):
         lexMessage(L, warnSmallLshouldNotBeUsed)
       getSymbol(L, tok)
-    of 'r', 'R': 
+    of 'r', 'R':
       if L.buf[L.bufPos + 1] == '\"': 
         Inc(L.bufPos)
         getString(L, tok, true)
@@ -780,7 +732,7 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
         getSymbol(L, tok)
     of '(': 
       Inc(L.bufpos)
-      if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): 
+      if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.': 
         tok.toktype = tkParDotLe
         Inc(L.bufpos)
       else: 
@@ -790,29 +742,29 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
       Inc(L.bufpos)
     of '[': 
       Inc(L.bufpos)
-      if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos + 1] != '.'): 
+      if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.':
         tok.toktype = tkBracketDotLe
         Inc(L.bufpos)
-      else: 
+      else:
         tok.toktype = tkBracketLe
-    of ']': 
+    of ']':
       tok.toktype = tkBracketRi
       Inc(L.bufpos)
-    of '.': 
-      if L.buf[L.bufPos + 1] == ']': 
+    of '.':
+      if L.buf[L.bufPos+1] == ']': 
         tok.tokType = tkBracketDotRi
         Inc(L.bufpos, 2)
-      elif L.buf[L.bufPos + 1] == '}': 
+      elif L.buf[L.bufPos+1] == '}': 
         tok.tokType = tkCurlyDotRi
         Inc(L.bufpos, 2)
-      elif L.buf[L.bufPos + 1] == ')': 
+      elif L.buf[L.bufPos+1] == ')': 
         tok.tokType = tkParDotRi
         Inc(L.bufpos, 2)
       else: 
         getOperator(L, tok)
     of '{': 
       Inc(L.bufpos)
-      if (L.buf[L.bufPos] == '.') and (L.buf[L.bufPos+1] != '.'): 
+      if L.buf[L.bufPos] == '.' and L.buf[L.bufPos+1] != '.':
         tok.toktype = tkCurlyDotLe
         Inc(L.bufpos)
       else: 
@@ -838,13 +790,16 @@ proc rawGetTok(L: var TLexer, tok: var TToken) =
       tok.tokType = tkCharLit
       getCharacter(L, tok)
       tok.tokType = tkCharLit
+    of '0'..'9':
+      tok = getNumber(L)
     else:
       if c in OpChars: 
         getOperator(L, tok)
       elif c == nimlexbase.EndOfFile:
         tok.toktype = tkEof
+        tok.indent = 0
       else:
-        tok.literal = c & ""
+        tok.literal = $c
         tok.tokType = tkInvalid
         lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
         Inc(L.bufpos)