some Token refactors (#21762)

* test some Token refactors * fix CI * showcase for more reductions, will revert * Revert "showcase for more reductions, will revert" This reverts commit 5ba48591f4d53e8d83a27de8b03d26c6178dd3d1. * make line and column int32 * remove int32 change
author: metagn <metagngn@gmail.com> 2023-05-06 22:27:28 +0300
committer: GitHub <noreply@github.com> 2023-05-06 21:27:28 +0200
commit: ba698d37c3254072a3bb946d3750ceaf450da7b4 (patch)
tree: 5d56dbca19232b4d2fc7b532a1b635a1b9298e0c
parent: d0c62fa169f3970653ce0d5bbd16e123efb24251 (diff)
download: Nim-ba698d37c3254072a3bb946d3750ceaf450da7b4.tar.gz
5 files changed, 28 insertions, 33 deletions
diff --git a/compiler/layouter.nim b/compiler/layouter.nim
index 6e8280e67..7cff98b11 100644
--- a/compiler/layouter.nim
+++ b/compiler/layouter.nim
@@ -510,7 +510,7 @@ proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
     rememberSplit(splitComma)
     wrSpace em
   of openPars:
-    if tok.strongSpaceA and not em.endsInWhite and
+    if tsLeading in tok.spacing and not em.endsInWhite and
         (not em.wasExportMarker or tok.tokType == tkCurlyDotLe):
       wrSpace em
     wr(em, $tok.tokType, ltSomeParLe)
@@ -528,7 +528,7 @@ proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
     wr(em, $tok.tokType, ltOther)
     if not em.inquote: wrSpace(em)
   of tkOpr, tkDotDot:
-    if em.inquote or (((not tok.strongSpaceA) and tok.strongSpaceB == tsNone) and
+    if em.inquote or (tok.spacing == {} and
         tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]):
       # bug #9504: remember to not spacify a keyword:
       lastTokWasTerse = true
@@ -538,7 +538,7 @@ proc emitTok*(em: var Emitter; L: Lexer; tok: Token) =
       if not em.endsInWhite: wrSpace(em)
       wr(em, tok.ident.s, ltOpr)
       template isUnary(tok): bool =
-        tok.strongSpaceB == tsNone and tok.strongSpaceA
+        tok.spacing == {tsLeading}
 
       if not isUnary(tok):
         rememberSplit(splitBinary)
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index a62d40e54..67dafc59f 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -94,19 +94,18 @@ type
     base2, base8, base16
 
   TokenSpacing* = enum
-    tsNone, tsTrailing, tsEof
+    tsLeading, tsTrailing, tsEof
 
   Token* = object                # a Nim token
     tokType*: TokType            # the type of the token
+    base*: NumericalBase         # the numerical base; only valid for int
+                                 # or float literals
+    spacing*: set[TokenSpacing]  # spaces around token
     indent*: int                 # the indentation; != -1 if the token has been
                                  # preceded with indentation
     ident*: PIdent               # the parsed identifier
     iNumber*: BiggestInt         # the parsed integer literal
     fNumber*: BiggestFloat       # the parsed floating point literal
-    base*: NumericalBase         # the numerical base; only valid for int
-                                 # or float literals
-    strongSpaceA*: bool          # leading spaces of an operator
-    strongSpaceB*: TokenSpacing  # trailing spaces of an operator
     literal*: string             # the parsed (string) literal; and
                                  # documentation comments are here too
     line*, col*: int
@@ -178,7 +177,7 @@ proc initToken*(L: var Token) =
   L.tokType = tkInvalid
   L.iNumber = 0
   L.indent = 0
-  L.strongSpaceA = false
+  L.spacing = {}
   L.literal = ""
   L.fNumber = 0.0
   L.base = base10
@@ -191,7 +190,7 @@ proc fillToken(L: var Token) =
   L.tokType = tkInvalid
   L.iNumber = 0
   L.indent = 0
-  L.strongSpaceA = false
+  L.spacing = {}
   setLen(L.literal, 0)
   L.fNumber = 0.0
   L.base = base10
@@ -960,13 +959,15 @@ proc getOperator(L: var Lexer, tok: var Token) =
   tokenEnd(tok, pos-1)
   # advance pos but don't store it in L.bufpos so the next token (which might
   # be an operator too) gets the preceding spaces:
-  tok.strongSpaceB = tsNone
+  tok.spacing = tok.spacing - {tsTrailing, tsEof}
+  var trailing = false
   while L.buf[pos] == ' ':
     inc pos
-    if tok.strongSpaceB != tsTrailing:
-      tok.strongSpaceB = tsTrailing
+    trailing = true
   if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
-    tok.strongSpaceB = tsEof
+    tok.spacing.incl(tsEof)
+  elif trailing:
+    tok.spacing.incl(tsTrailing)
 
 proc getPrecedence*(tok: Token): int =
   ## Calculates the precedence of the given token.
@@ -1077,7 +1078,6 @@ proc skipMultiLineComment(L: var Lexer; tok: var Token; start: int;
       when defined(nimpretty): tok.literal.add "\L"
       if isDoc:
         when not defined(nimpretty): tok.literal.add "\n"
-        inc tok.iNumber
         var c = toStrip
         while L.buf[pos] == ' ' and c > 0:
           inc pos
@@ -1096,8 +1096,6 @@ proc skipMultiLineComment(L: var Lexer; tok: var Token; start: int;
 proc scanComment(L: var Lexer, tok: var Token) =
   var pos = L.bufpos
   tok.tokType = tkComment
-  # iNumber contains the number of '\n' in the token
-  tok.iNumber = 0
   assert L.buf[pos+1] == '#'
   when defined(nimpretty):
     tok.commentOffsetA = L.offsetBase + pos
@@ -1140,7 +1138,6 @@ proc scanComment(L: var Lexer, tok: var Token) =
         while L.buf[pos] == ' ' and c > 0:
           inc pos
           dec c
-        inc tok.iNumber
     else:
       if L.buf[pos] > ' ':
         L.indentAhead = indent
@@ -1153,7 +1150,7 @@ proc scanComment(L: var Lexer, tok: var Token) =
 proc skip(L: var Lexer, tok: var Token) =
   var pos = L.bufpos
   tokenBegin(tok, pos)
-  tok.strongSpaceA = false
+  tok.spacing.excl(tsLeading)
   when defined(nimpretty):
     var hasComment = false
     var commentIndent = L.currLineIndent
@@ -1164,8 +1161,7 @@ proc skip(L: var Lexer, tok: var Token) =
     case L.buf[pos]
     of ' ':
       inc(pos)
-      if not tok.strongSpaceA:
-        tok.strongSpaceA = true
+      tok.spacing.incl(tsLeading)
     of '\t':
       if not L.allowTabs: lexMessagePos(L, errGenerated, pos, "tabs are not allowed, use spaces instead")
       inc(pos)
@@ -1187,7 +1183,7 @@ proc skip(L: var Lexer, tok: var Token) =
           pos = L.bufpos
         else:
           break
-      tok.strongSpaceA = false
+      tok.spacing.excl(tsLeading)
       when defined(nimpretty):
         if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent
       if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'):
diff --git a/compiler/parser.nim b/compiler/parser.nim
index 26a442e23..babbb87fd 100644
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -301,14 +301,13 @@ proc isRightAssociative(tok: Token): bool {.inline.} =
 proc isUnary(tok: Token): bool =
   ## Check if the given token is a unary operator
   tok.tokType in {tkOpr, tkDotDot} and
-  tok.strongSpaceB == tsNone and
-  tok.strongSpaceA
+  tok.spacing == {tsLeading}
 
 proc checkBinary(p: Parser) {.inline.} =
   ## Check if the current parser token is a binary operator.
   # we don't check '..' here as that's too annoying
   if p.tok.tokType == tkOpr:
-    if p.tok.strongSpaceB == tsTrailing and not p.tok.strongSpaceA:
+    if p.tok.spacing == {tsTrailing}:
       parMessage(p, warnInconsistentSpacing, prettyTok(p.tok))
 
 #| module = stmt ^* (';' / IND{=})
@@ -516,7 +515,7 @@ proc dotExpr(p: var Parser, a: PNode): PNode =
   optInd(p, result)
   result.add(a)
   result.add(parseSymbol(p, smAfterDot))
-  if p.tok.tokType == tkBracketLeColon and not p.tok.strongSpaceA:
+  if p.tok.tokType == tkBracketLeColon and tsLeading notin p.tok.spacing:
     var x = newNodeI(nkBracketExpr, p.parLineInfo)
     # rewrite 'x.y[:z]()' to 'y[z](x)'
     x.add result[1]
@@ -525,7 +524,7 @@ proc dotExpr(p: var Parser, a: PNode): PNode =
     var y = newNodeI(nkCall, p.parLineInfo)
     y.add x
     y.add result[0]
-    if p.tok.tokType == tkParLe and not p.tok.strongSpaceA:
+    if p.tok.tokType == tkParLe and tsLeading notin p.tok.spacing:
       exprColonEqExprListAux(p, tkParRi, y)
     result = y
 
@@ -883,7 +882,7 @@ proc primarySuffix(p: var Parser, r: PNode,
     case p.tok.tokType
     of tkParLe:
       # progress guaranteed
-      if p.tok.strongSpaceA:
+      if tsLeading in p.tok.spacing:
         result = commandExpr(p, result, mode)
         break
       result = namedParams(p, result, nkCall, tkParRi)
@@ -895,13 +894,13 @@ proc primarySuffix(p: var Parser, r: PNode,
       result = parseGStrLit(p, result)
     of tkBracketLe:
       # progress guaranteed
-      if p.tok.strongSpaceA:
+      if tsLeading in p.tok.spacing:
         result = commandExpr(p, result, mode)
         break
       result = namedParams(p, result, nkBracketExpr, tkBracketRi)
     of tkCurlyLe:
       # progress guaranteed
-      if p.tok.strongSpaceA:
+      if tsLeading in p.tok.spacing:
         result = commandExpr(p, result, mode)
         break
       result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
@@ -2525,7 +2524,7 @@ proc parseAll(p: var Parser): PNode =
   setEndInfo()
 
 proc checkFirstLineIndentation*(p: var Parser) =
-  if p.tok.indent != 0 and p.tok.strongSpaceA:
+  if p.tok.indent != 0 and tsLeading in p.tok.spacing:
     parMessage(p, errInvalidIndentation)
 
 proc parseTopLevelStmt(p: var Parser): PNode =
diff --git a/nimpretty/tests/exhaustive.nim b/nimpretty/tests/exhaustive.nim
index 53ff0ea4d..bcf825665 100644
--- a/nimpretty/tests/exhaustive.nim
+++ b/nimpretty/tests/exhaustive.nim
@@ -267,7 +267,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
     if not em.endsInWhite: wr(" ")
     wr(tok.ident.s)
     template isUnary(tok): bool =
-      tok.strongSpaceB == tsNone and tok.strongSpaceA
+      tok.spacing == {tsLeading}
 
     if not isUnary(tok) or em.lastTok in {tkOpr, tkDotDot}:
       wr(" ")
diff --git a/nimpretty/tests/expected/exhaustive.nim b/nimpretty/tests/expected/exhaustive.nim
index 266bcae06..50ae92a62 100644
--- a/nimpretty/tests/expected/exhaustive.nim
+++ b/nimpretty/tests/expected/exhaustive.nim
@@ -272,7 +272,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) =
     if not em.endsInWhite: wr(" ")
     wr(tok.ident.s)
     template isUnary(tok): bool =
-      tok.strongSpaceB == tsNone and tok.strongSpaceA
+      tok.spacing == {tsLeading}
 
     if not isUnary(tok) or em.lastTok in {tkOpr, tkDotDot}:
       wr(" ")
author	metagn <metagngn@gmail.com>	2023-05-06 22:27:28 +0300
committer	GitHub <noreply@github.com>	2023-05-06 21:27:28 +0200
commit	ba698d37c3254072a3bb946d3750ceaf450da7b4 (patch)
tree	5d56dbca19232b4d2fc7b532a1b635a1b9298e0c
parent	d0c62fa169f3970653ce0d5bbd16e123efb24251 (diff)
download	Nim-ba698d37c3254072a3bb946d3750ceaf450da7b4.tar.gz