summary refs log tree commit diff stats
path: root/compiler/lexer.nim
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r--compiler/lexer.nim94
1 files changed, 33 insertions, 61 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 9c513034b..2769d757c 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -127,8 +127,9 @@ type
                               # this is needed because scanning comments
                               # needs so much look-ahead
     currLineIndent*: int
-    strongSpaces*: bool
+    strongSpaces*, allowTabs*: bool
     errorHandler*: TErrorHandler
+    cache*: IdentCache
 
 var gLinesCompiled*: int  # all lines that have been compiled
 
@@ -164,7 +165,6 @@ proc tokToStr*(tok: TToken): string =
     if tok.ident != nil:
       result = tok.ident.s
     else:
-      internalError("tokToStr")
       result = ""
 
 proc prettyTok*(tok: TToken): string =
@@ -175,8 +175,6 @@ proc printTok*(tok: TToken) =
   msgWriteln($tok.line & ":" & $tok.col & "\t" &
       TokTypeToStr[tok.tokType] & " " & tokToStr(tok))
 
-var dummyIdent: PIdent
-
 proc initToken*(L: var TToken) =
   L.tokType = tkInvalid
   L.iNumber = 0
@@ -185,7 +183,7 @@ proc initToken*(L: var TToken) =
   L.literal = ""
   L.fNumber = 0.0
   L.base = base10
-  L.ident = dummyIdent
+  L.ident = nil
 
 proc fillToken(L: var TToken) =
   L.tokType = tkInvalid
@@ -195,17 +193,20 @@ proc fillToken(L: var TToken) =
   setLen(L.literal, 0)
   L.fNumber = 0.0
   L.base = base10
-  L.ident = dummyIdent
+  L.ident = nil
 
-proc openLexer*(lex: var TLexer, fileIdx: int32, inputstream: PLLStream) =
+proc openLexer*(lex: var TLexer, fileIdx: int32, inputstream: PLLStream;
+                 cache: IdentCache) =
   openBaseLexer(lex, inputstream)
   lex.fileIdx = fileidx
   lex.indentAhead = - 1
   lex.currLineIndent = 0
   inc(lex.lineNumber, inputstream.lineOffset)
+  lex.cache = cache
 
-proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) =
-  openLexer(lex, filename.fileInfoIdx, inputstream)
+proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream;
+                cache: IdentCache) =
+  openLexer(lex, filename.fileInfoIdx, inputstream, cache)
 
 proc closeLexer*(lex: var TLexer) =
   inc(gLinesCompiled, lex.lineNumber)
@@ -746,7 +747,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
 
     else: break
   h = !$h
-  tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
+  tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   L.bufpos = pos
   if (tok.ident.id < ord(tokKeywordLow) - ord(tkSymbol)) or
       (tok.ident.id > ord(tokKeywordHigh) - ord(tkSymbol)):
@@ -757,7 +758,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
 proc endOperator(L: var TLexer, tok: var TToken, pos: int,
                  hash: Hash) {.inline.} =
   var h = !$hash
-  tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
+  tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
   if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh): tok.tokType = tkOpr
   else: tok.tokType = TTokType(tok.ident.id - oprLow + ord(tkColon))
   L.bufpos = pos
@@ -847,34 +848,23 @@ proc scanComment(L: var TLexer, tok: var TToken) =
   tok.tokType = tkComment
   # iNumber contains the number of '\n' in the token
   tok.iNumber = 0
-  when not defined(nimfix):
-    assert buf[pos+1] == '#'
-    if buf[pos+2] == '[':
-      skipMultiLineComment(L, tok, pos+3, true)
-      return
-    inc(pos, 2)
+  assert buf[pos+1] == '#'
+  if buf[pos+2] == '[':
+    skipMultiLineComment(L, tok, pos+3, true)
+    return
+  inc(pos, 2)
 
   var toStrip = 0
   while buf[pos] == ' ':
     inc pos
     inc toStrip
 
-  when defined(nimfix):
-    var col = getColNumber(L, pos)
   while true:
     var lastBackslash = -1
     while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
       if buf[pos] == '\\': lastBackslash = pos+1
       add(tok.literal, buf[pos])
       inc(pos)
-    when defined(nimfix):
-      if lastBackslash > 0:
-        # a backslash is a continuation character if only followed by spaces
-        # plus a newline:
-        while buf[lastBackslash] == ' ': inc(lastBackslash)
-        if buf[lastBackslash] notin {CR, LF, nimlexbase.EndOfFile}:
-          # false positive:
-          lastBackslash = -1
 
     pos = handleCRLF(L, pos)
     buf = L.buf
@@ -883,21 +873,13 @@ proc scanComment(L: var TLexer, tok: var TToken) =
       inc(pos)
       inc(indent)
 
-    when defined(nimfix):
-      template doContinue(): untyped =
-        buf[pos] == '#' and (col == indent or lastBackslash > 0)
-    else:
-      template doContinue(): untyped =
-        buf[pos] == '#' and buf[pos+1] == '#'
-    if doContinue():
+    if buf[pos] == '#' and buf[pos+1] == '#':
       tok.literal.add "\n"
-      when defined(nimfix): col = indent
-      else:
-        inc(pos, 2)
-        var c = toStrip
-        while buf[pos] == ' ' and c > 0:
-          inc pos
-          dec c
+      inc(pos, 2)
+      var c = toStrip
+      while buf[pos] == ' ' and c > 0:
+        inc pos
+        dec c
       inc tok.iNumber
     else:
       if buf[pos] > ' ':
@@ -915,7 +897,7 @@ proc skip(L: var TLexer, tok: var TToken) =
       inc(pos)
       inc(tok.strongSpaceA)
     of '\t':
-      lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
+      if not L.allowTabs: lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
       inc(pos)
     of CR, LF:
       pos = handleCRLF(L, pos)
@@ -932,27 +914,19 @@ proc skip(L: var TLexer, tok: var TToken) =
         else:
           break
       tok.strongSpaceA = 0
-      when defined(nimfix):
-        template doBreak(): untyped = buf[pos] > ' '
-      else:
-        template doBreak(): untyped =
-          buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#')
-      if doBreak():
+      if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'):
         tok.indent = indent
         L.currLineIndent = indent
         break
     of '#':
-      when defined(nimfix):
-        break
+      # do not skip documentation comment:
+      if buf[pos+1] == '#': break
+      if buf[pos+1] == '[':
+        skipMultiLineComment(L, tok, pos+2, false)
+        pos = L.bufpos
+        buf = L.buf
       else:
-        # do not skip documentation comment:
-        if buf[pos+1] == '#': break
-        if buf[pos+1] == '[':
-          skipMultiLineComment(L, tok, pos+2, false)
-          pos = L.bufpos
-          buf = L.buf
-        else:
-          while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: inc(pos)
+        while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: inc(pos)
     else:
       break                   # EndOfFile also leaves the loop
   L.bufpos = pos
@@ -1051,7 +1025,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
       if L.buf[L.bufpos] notin SymChars+{'_'} and not
           isMagicIdentSeparatorRune(L.buf, L.bufpos):
         tok.tokType = tkSymbol
-        tok.ident = getIdent("_")
+        tok.ident = L.cache.getIdent("_")
       else:
         tok.literal = $c
         tok.tokType = tkInvalid
@@ -1084,5 +1058,3 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
         tok.tokType = tkInvalid
         lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
         inc(L.bufpos)
-
-dummyIdent = getIdent("")