summary refs log tree commit diff stats
path: root/compiler/lexer.nim
diff options
context:
space:
mode:
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r--compiler/lexer.nim83
1 files changed, 43 insertions, 40 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index a65587390..591561987 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -17,7 +17,7 @@
 
 import
   hashes, options, msgs, strutils, platform, idents, nimlexbase, llstream,
-  wordrecg
+  wordrecg, configuration
 
 const
   MaxLineLength* = 80         # lines longer than this lead to a warning
@@ -131,7 +131,7 @@ type
                                 # like 0b01 or  r"\L" are unaffected
       commentOffsetA*, commentOffsetB*: int
 
-  TErrorHandler* = proc (info: TLineInfo; msg: TMsgKind; arg: string)
+  TErrorHandler* = proc (conf: ConfigRef; info: TLineInfo; msg: TMsgKind; arg: string)
   TLexer* = object of TBaseLexer
     fileIdx*: FileIndex
     indentAhead*: int         # if > 0 an indendation has already been read
@@ -190,8 +190,8 @@ proc prettyTok*(tok: TToken): string =
   if isKeyword(tok.tokType): result = "keyword " & tok.ident.s
   else: result = tokToStr(tok)
 
-proc printTok*(tok: TToken) =
-  msgWriteln($tok.line & ":" & $tok.col & "\t" &
+proc printTok*(conf: ConfigRef; tok: TToken) =
+  msgWriteln(conf, $tok.line & ":" & $tok.col & "\t" &
       TokTypeToStr[tok.tokType] & " " & tokToStr(tok))
 
 proc initToken*(L: var TToken) =
@@ -234,7 +234,7 @@ proc openLexer*(lex: var TLexer, fileIdx: FileIndex, inputstream: PLLStream;
 
 proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream;
                 cache: IdentCache; config: ConfigRef) =
-  openLexer(lex, filename.fileInfoIdx, inputstream, cache, config)
+  openLexer(lex, fileInfoIdx(config, filename), inputstream, cache, config)
 
 proc closeLexer*(lex: var TLexer) =
   if lex.config != nil:
@@ -246,9 +246,9 @@ proc getLineInfo(L: TLexer): TLineInfo =
 
 proc dispMessage(L: TLexer; info: TLineInfo; msg: TMsgKind; arg: string) =
   if L.errorHandler.isNil:
-    msgs.message(info, msg, arg)
+    msgs.message(L.config, info, msg, arg)
   else:
-    L.errorHandler(info, msg, arg)
+    L.errorHandler(L.config, info, msg, arg)
 
 proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") =
   L.dispMessage(getLineInfo(L), msg, arg)
@@ -274,7 +274,7 @@ template tokenEnd(tok, pos) {.dirty.} =
   when defined(nimsuggest):
     let colB = getColNumber(L, pos)+1
     if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
-        L.lineNumber == gTrackPos.line.int and gIdeCmd in {ideSug, ideCon}:
+        L.lineNumber == gTrackPos.line.int and L.config.ideCmd in {ideSug, ideCon}:
       L.cursor = CursorPosition.InToken
       gTrackPos.col = colA.int16
     colA = 0
@@ -285,7 +285,7 @@ template tokenEndIgnore(tok, pos) =
   when defined(nimsuggest):
     let colB = getColNumber(L, pos)
     if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
-        L.lineNumber == gTrackPos.line.int and gIdeCmd in {ideSug, ideCon}:
+        L.lineNumber == gTrackPos.line.int and L.config.ideCmd in {ideSug, ideCon}:
       gTrackPos.fileIndex = trackPosInvalidFileIdx
       gTrackPos.line = 0'u16
     colA = 0
@@ -299,7 +299,7 @@ template tokenEndPrevious(tok, pos) =
     # the cursor in a string literal or comment:
     let colB = getColNumber(L, pos)
     if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and
-        L.lineNumber == gTrackPos.line.int and gIdeCmd in {ideSug, ideCon}:
+        L.lineNumber == gTrackPos.line.int and L.config.ideCmd in {ideSug, ideCon}:
       L.cursor = CursorPosition.BeforeToken
       gTrackPos = L.previousToken
       gTrackPosAttached = true
@@ -341,7 +341,8 @@ proc getNumber(L: var TLexer, result: var TToken) =
         break
       if buf[pos] == '_':
         if buf[pos+1] notin chars:
-          lexMessage(L, errInvalidToken, "_")
+          lexMessage(L, errGenerated,
+            "only single underscores may occur in a token: '__' is invalid")
           break
         add(tok.literal, '_')
         inc(pos)
@@ -355,7 +356,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
       inc(pos)
     L.bufpos = pos
 
-  proc lexMessageLitNum(L: var TLexer, msg: TMsgKind, startpos: int) =
+  proc lexMessageLitNum(L: var TLexer, msg: string, startpos: int) =
     # Used to get slightly human friendlier err messages.
     # Note: the erroneous 'O' char in the character set is intentional
     const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
@@ -376,7 +377,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
       add(t.literal, L.buf[L.bufpos])
       matchChars(L, t, {'0'..'9'})
     L.bufpos = msgPos
-    lexMessage(L, msg, t.literal)
+    lexMessage(L, errGenerated, msg % t.literal)
 
   var
     startpos, endpos: int
@@ -398,7 +399,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
     eatChar(L, result, '0')
     case L.buf[L.bufpos]
     of 'O':
-      lexMessageLitNum(L, errInvalidNumberOctalCode, startpos)
+      lexMessageLitNum(L, "$1 is not a valid number; did you mean octal? Then use one of '0o', '0c' or '0C'.", startpos)
     of 'x', 'X':
       eatChar(L, result, 'x')
       matchUnderscoreChars(L, result, {'0'..'9', 'a'..'f', 'A'..'F'})
@@ -409,7 +410,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
       eatChar(L, result, 'b')
       matchUnderscoreChars(L, result, {'0'..'1'})
     else:
-      internalError(getLineInfo(L), "getNumber")
+      internalError(L.config, getLineInfo(L), "getNumber")
   else:
     matchUnderscoreChars(L, result, {'0'..'9'})
     if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
@@ -464,7 +465,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
         result.tokType = tkInt8Lit
         inc(postPos)
       else:
-        lexMessageLitNum(L, errInvalidNumber, startpos)
+        lexMessageLitNum(L, "invalid number: '$1'", startpos)
     of 'u', 'U':
       inc(postPos)
       if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
@@ -482,12 +483,12 @@ proc getNumber(L: var TLexer, result: var TToken) =
       else:
         result.tokType = tkUIntLit
     else:
-      lexMessageLitNum(L, errInvalidNumber, startpos)
+      lexMessageLitNum(L, "invalid number: '$1'", startpos)
 
   # Is there still a literalish char awaiting? Then it's an error!
   if  L.buf[postPos] in literalishChars or
      (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
-    lexMessageLitNum(L, errInvalidNumber, startpos)
+    lexMessageLitNum(L, "invalid number: '$1'", startpos)
 
   # Third stage, extract actual number
   L.bufpos = startpos            # restore position
@@ -528,7 +529,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
           else:
             break
       else:
-        internalError(getLineInfo(L), "getNumber")
+        internalError(L.config, getLineInfo(L), "getNumber")
 
       case result.tokType
       of tkIntLit, tkInt64Lit: result.iNumber = xi
@@ -545,7 +546,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
         # XXX: Test this on big endian machine!
       of tkFloat64Lit, tkFloatLit:
         result.fNumber = (cast[PFloat64](addr(xi)))[]
-      else: internalError(getLineInfo(L), "getNumber")
+      else: internalError(L.config, getLineInfo(L), "getNumber")
 
       # Bounds checks. Non decimal literals are allowed to overflow the range of
       # the datatype as long as their pattern don't overflow _bitwise_, hence
@@ -561,7 +562,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
 
         if outOfRange:
           #echo "out of range num: ", result.iNumber, " vs ", xi
-          lexMessageLitNum(L, errNumberOutOfRange, startpos)
+          lexMessageLitNum(L, "number out of range: '$1'", startpos)
 
     else:
       case result.tokType
@@ -590,7 +591,7 @@ proc getNumber(L: var TLexer, result: var TToken) =
                         result.iNumber > BiggestInt(uint32.high))
         else: false
 
-      if outOfRange: lexMessageLitNum(L, errNumberOutOfRange, startpos)
+      if outOfRange: lexMessageLitNum(L, "number out of range: '$1'", startpos)
 
     # Promote int literal to int64? Not always necessary, but more consistent
     if result.tokType == tkIntLit:
@@ -598,9 +599,9 @@ proc getNumber(L: var TLexer, result: var TToken) =
         result.tokType = tkInt64Lit
 
   except ValueError:
-    lexMessageLitNum(L, errInvalidNumber, startpos)
+    lexMessageLitNum(L, "invalid number: '$1'", startpos)
   except OverflowError, RangeError:
-    lexMessageLitNum(L, errNumberOutOfRange, startpos)
+    lexMessageLitNum(L, "number out of range: '$1'", startpos)
   tokenEnd(result, postPos-1)
   L.bufpos = postPos
 
@@ -626,8 +627,9 @@ proc getEscapedChar(L: var TLexer, tok: var TToken) =
   inc(L.bufpos)               # skip '\'
   case L.buf[L.bufpos]
   of 'n', 'N':
-    if gOldNewlines:
-      if tok.tokType == tkCharLit: lexMessage(L, errNnotAllowedInCharacter)
+    if L.config.oldNewlines:
+      if tok.tokType == tkCharLit:
+        lexMessage(L, errGenerated, "\\n not allowed in character literal")
       add(tok.literal, tnl)
     else:
       add(tok.literal, '\L')
@@ -696,8 +698,8 @@ proc getEscapedChar(L: var TLexer, tok: var TToken) =
     var xi = 0
     handleDecChars(L, xi)
     if (xi <= 255): add(tok.literal, chr(xi))
-    else: lexMessage(L, errInvalidCharacterConstant)
-  else: lexMessage(L, errInvalidCharacterConstant)
+    else: lexMessage(L, errGenerated, "invalid character constant")
+  else: lexMessage(L, errGenerated, "invalid character constant")
 
 proc newString(s: cstring, len: int): string =
   ## XXX, how come there is no support for this?
@@ -712,7 +714,7 @@ proc handleCRLF(L: var TLexer, pos: int): int =
     if col > MaxLineLength:
       lexMessagePos(L, hintLineTooLong, pos)
 
-    if optEmbedOrigSrc in gGlobalOptions:
+    if optEmbedOrigSrc in L.config.globalOptions:
       let lineStart = cast[ByteAddress](L.buf) + L.lineStart
       let line = newString(cast[cstring](lineStart), col)
       addSourceLine(L.fileIdx, line)
@@ -761,7 +763,7 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
         tokenEndIgnore(tok, pos)
         var line2 = L.lineNumber
         L.lineNumber = line
-        lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart)
+        lexMessagePos(L, errGenerated, L.lineStart, "closing \"\"\" expected, but end of file reached")
         L.lineNumber = line2
         L.bufpos = pos
         break
@@ -784,7 +786,7 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) =
           break
       elif c in {CR, LF, nimlexbase.EndOfFile}:
         tokenEndIgnore(tok, pos)
-        lexMessage(L, errClosingQuoteExpected)
+        lexMessage(L, errGenerated, "closing \" expected")
         break
       elif (c == '\\') and not rawMode:
         L.bufpos = pos
@@ -800,12 +802,13 @@ proc getCharacter(L: var TLexer, tok: var TToken) =
   inc(L.bufpos)               # skip '
   var c = L.buf[L.bufpos]
   case c
-  of '\0'..pred(' '), '\'': lexMessage(L, errInvalidCharacterConstant)
+  of '\0'..pred(' '), '\'': lexMessage(L, errGenerated, "invalid character literal")
   of '\\': getEscapedChar(L, tok)
   else:
     tok.literal = $c
     inc(L.bufpos)
-  if L.buf[L.bufpos] != '\'': lexMessage(L, errMissingFinalQuote)
+  if L.buf[L.bufpos] != '\'':
+    lexMessage(L, errGenerated, "missing closing ' for character literal")
   tokenEndIgnore(tok, L.bufpos)
   inc(L.bufpos)               # skip '
 
@@ -826,7 +829,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
       inc(pos)
     of '_':
       if buf[pos+1] notin SymChars:
-        lexMessage(L, errInvalidToken, "_")
+        lexMessage(L, errGenerated, "invalid token: trailing underscore")
         break
       inc(pos)
     else: break
@@ -1014,7 +1017,7 @@ proc skip(L: var TLexer, tok: var TToken) =
       inc(pos)
       inc(tok.strongSpaceA)
     of '\t':
-      if not L.allowTabs: lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
+      if not L.allowTabs: lexMessagePos(L, errGenerated, pos, "tabulators are not allowed")
       inc(pos)
     of CR, LF:
       tokenEndPrevious(tok, pos)
@@ -1119,7 +1122,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
         tok.tokType = tkParLe
         when defined(nimsuggest):
           if L.fileIdx == gTrackPos.fileIndex and tok.col < gTrackPos.col and
-                    tok.line == gTrackPos.line.int and gIdeCmd == ideCon:
+                    tok.line == gTrackPos.line.int and L.config.ideCmd == ideCon:
             gTrackPos.col = tok.col.int16
     of ')':
       tok.tokType = tkParRi
@@ -1140,7 +1143,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
     of '.':
       when defined(nimsuggest):
         if L.fileIdx == gTrackPos.fileIndex and tok.col+1 == gTrackPos.col and
-            tok.line == gTrackPos.line.int and gIdeCmd == ideSug:
+            tok.line == gTrackPos.line.int and L.config.ideCmd == ideSug:
           tok.tokType = tkDot
           L.cursor = CursorPosition.InToken
           gTrackPos.col = tok.col.int16
@@ -1182,7 +1185,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
       else:
         tok.literal = $c
         tok.tokType = tkInvalid
-        lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
+        lexMessage(L, errGenerated, "invalid token: " & c & " (\\" & $(ord(c)) & ')')
     of '\"':
       # check for extended raw string literal:
       var rawMode = L.bufpos > 0 and L.buf[L.bufpos-1] in SymChars
@@ -1199,7 +1202,7 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
       getNumber(L, tok)
       let c = L.buf[L.bufpos]
       if c in SymChars+{'_'}:
-        lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
+        lexMessage(L, errGenerated, "invalid token: no whitespace between number and identifier")
     else:
       if c in OpChars:
         getOperator(L, tok)
@@ -1209,6 +1212,6 @@ proc rawGetTok*(L: var TLexer, tok: var TToken) =
       else:
         tok.literal = $c
         tok.tokType = tkInvalid
-        lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
+        lexMessage(L, errGenerated, "invalid token: " & c & " (\\" & $(ord(c)) & ')')
         inc(L.bufpos)
   atTokenEnd()