implements strongSpaces parsing mode

author: Araq <rumpf_a@web.de> 2014-03-07 22:25:05 +0100
committer: Araq <rumpf_a@web.de> 2014-03-07 22:25:05 +0100
commit: 91d842e1ec070a9ab7f883820bd6244526f5d622 (patch)
tree: 820f09d7ddb69b01ef6a27b43ea9de5ff0df2b0a
parent: 7904446c47f952a026d408f04431dbaf6d887b37 (diff)
download: Nim-91d842e1ec070a9ab7f883820bd6244526f5d622.tar.gz
8 files changed, 123 insertions, 43 deletions
diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim
index 07e932b28..3bc4eb029 100644
--- a/compiler/canonicalizer.nim
+++ b/compiler/canonicalizer.nim
@@ -81,7 +81,7 @@ proc hashSym(c: var MD5Context, s: PSym) =
 
 proc hashTree(c: var MD5Context, n: PNode) =
   if n == nil:
-    c &= "noTreeKind"
+    c &= "\255"
     return
   var k = n.kind
   md5Update(c, cast[cstring](addr(k)), 1)
@@ -107,7 +107,7 @@ proc hashTree(c: var MD5Context, n: PNode) =
 proc hashType(c: var MD5Context, t: PType) =
   # modelled after 'typeToString'
   if t == nil: 
-    c &= "noTypeKind"
+    c &= "\254"
     return
 
   var k = t.kind
@@ -168,7 +168,7 @@ proc canonConst(n: PNode): TUid =
   c.hashType(n.typ)
   md5Final(c, MD5Digest(result))
 
-proc canonSym(s: PSym): TUid
+proc canonSym(s: PSym): TUid =
   var c: MD5Context
   md5Init(c)
   c.hashSym(s)
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 0e7df13cd..9c6c5e22f 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -110,6 +110,8 @@ type
     fNumber*: BiggestFloat    # the parsed floating point literal
     base*: TNumericalBase     # the numerical base; only valid for int
                               # or float literals
+    strongSpaceA*: int8       # leading spaces of an operator
+    strongSpaceB*: int8       # trailing spaces of an operator
     literal*: string          # the parsed (string) literal; and
                               # documentation comments are here too
     line*, col*: int
@@ -119,6 +121,7 @@ type
     indentAhead*: int         # if > 0 an indendation has already been read
                               # this is needed because scanning comments
                               # needs so much look-ahead
+    strongSpaces*: bool
   
 
 var gLinesCompiled*: int  # all lines that have been compiled
@@ -183,6 +186,7 @@ proc initToken*(L: var TToken) =
   L.tokType = tkInvalid
   L.iNumber = 0
   L.indent = 0
+  L.strongSpaceA = 0
   L.literal = ""
   L.fNumber = 0.0
   L.base = base10
@@ -192,6 +196,7 @@ proc fillToken(L: var TToken) =
   L.tokType = tkInvalid
   L.iNumber = 0
   L.indent = 0
+  L.strongSpaceA = 0
   setLen(L.literal, 0)
   L.fNumber = 0.0
   L.base = base10
@@ -634,6 +639,14 @@ proc getOperator(L: var TLexer, tok: var TToken) =
     h = h !& ord(c)
     inc(pos)
   endOperator(L, tok, pos, h)
+  # advance pos but don't store it in L.bufpos so the next token (which might
+  # be an operator too) gets the preceeding spaces:
+  tok.strongSpaceB = 0
+  while buf[pos] == ' ':
+    inc pos
+    inc tok.strongSpaceB
+  if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
+    tok.strongSpaceB = -1
 
 proc scanComment(L: var TLexer, tok: var TToken) = 
   var pos = L.bufpos
@@ -677,10 +690,12 @@ proc scanComment(L: var TLexer, tok: var TToken) =
 proc skip(L: var TLexer, tok: var TToken) =
   var pos = L.bufpos
   var buf = L.buf
+  tok.strongSpaceA = 0
   while true:
     case buf[pos]
     of ' ':
       inc(pos)
+      inc(tok.strongSpaceA)
     of Tabulator:
       lexMessagePos(L, errTabulatorsAreNotAllowed, pos)
       inc(pos)
@@ -691,6 +706,7 @@ proc skip(L: var TLexer, tok: var TToken) =
       while buf[pos] == ' ':
         inc(pos)
         inc(indent)
+      tok.strongSpaceA = 0
       if buf[pos] > ' ':
         tok.indent = indent
         break
diff --git a/compiler/parser.nim b/compiler/parser.nim
index 5a5bfb574..cfba89f4a 100644
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -38,7 +38,6 @@ type
     inSemiStmtList: int
 
 proc parseAll*(p: var TParser): PNode
-proc openParser*(p: var TParser, filename: string, inputstream: PLLStream)
 proc closeParser*(p: var TParser)
 proc parseTopLevelStmt*(p: var TParser): PNode
   # implements an iterator. Returns the next top-level statement or
@@ -50,7 +49,6 @@ proc parseString*(s: string, filename: string = "", line: int = 0): PNode
   # correct error messages referring to the original source.
   
 # helpers for the other parsers
-proc getPrecedence*(tok: TToken): int
 proc isOperator*(tok: TToken): bool
 proc getTok*(p: var TParser)
 proc parMessage*(p: TParser, msg: TMsgKind, arg: string = "")
@@ -77,14 +75,17 @@ proc parseCase(p: var TParser): PNode
 proc getTok(p: var TParser) = 
   rawGetTok(p.lex, p.tok)
 
-proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) =
+proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream,
+                 strongSpaces=false) =
   initToken(p.tok)
   openLexer(p.lex, fileIdx, inputStream)
   getTok(p)                   # read the first token
   p.firstTok = true
+  p.strongSpaces = strongSpaces
 
-proc openParser*(p: var TParser, filename: string, inputStream: PLLStream) =
-  openParser(p, filename.fileInfoIdx, inputstream)
+proc openParser*(p: var TParser, filename: string, inputStream: PLLStream,
+                 strongSpaces=false) =
+  openParser(p, filename.fileInfoIdx, inputstream, strongSpaces)
 
 proc closeParser(p: var TParser) = 
   closeLexer(p.lex)
@@ -193,34 +194,52 @@ proc isSigilLike(tok: TToken): bool {.inline.} =
 proc isLeftAssociative(tok: TToken): bool {.inline.} =
   result = tok.tokType != tkOpr or relevantOprChar(tok.ident) != '^'
 
-proc getPrecedence(tok: TToken): int = 
+proc getPrecedence(tok: TToken, strongSpaces: bool): int =
+  template considerStrongSpaces(x): expr =
+    x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0)
+
   case tok.tokType
   of tkOpr:
     let L = tok.ident.s.len
     let relevantChar = relevantOprChar(tok.ident)
     
-    template considerAsgn(value: expr) = 
-      result = if tok.ident.s[L-1] == '=': 1 else: value     
+    template considerAsgn(value: expr) =
+      result = if tok.ident.s[L-1] == '=': 1 else: considerStrongSpaces(value)
     
     case relevantChar
     of '$', '^': considerAsgn(10)
     of '*', '%', '/', '\\': considerAsgn(9)
-    of '~': result = 8
+    of '~': result = considerStrongSpaces(8)
     of '+', '-', '|': considerAsgn(8)
     of '&': considerAsgn(7)
-    of '=', '<', '>', '!': result = 5
+    of '=', '<', '>', '!': result = considerStrongSpaces(5)
     of '.': considerAsgn(6)
-    of '?': result = 2
+    of '?': result = considerStrongSpaces(2)
     else: considerAsgn(2)
   of tkDiv, tkMod, tkShl, tkShr: result = 9
   of tkIn, tkNotin, tkIs, tkIsnot, tkNot, tkOf, tkAs: result = 5
-  of tkDotDot: result = 6
+  of tkDotDot: result = considerStrongSpaces(6)
   of tkAnd: result = 4
   of tkOr, tkXor: result = 3
-  else: result = - 10
-  
-proc isOperator(tok: TToken): bool = 
-  result = getPrecedence(tok) >= 0
+  else: result = -10
+
+proc isOperator(tok: TToken): bool =
+  tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs,
+                  tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor}
+
+proc isUnary(p: TParser): bool =
+  p.strongSpaces and p.tok.tokType in {tkOpr, tkDotDot} and
+    p.tok.strongSpaceB == 0 and
+    p.tok.strongSpaceA > 0
+
+proc checkBinary(p: TParser) {.inline.} =
+  # we don't check '..' here as that's too annoying
+  if p.strongSpaces and p.tok.tokType == tkOpr:
+    if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB:
+      parMessage(p, errGenerated, "number of spaces around '$#' not consistent"%
+        prettyTok(p.tok))
+    elif p.tok.strongSpaceA notin {0,1,2,4,8}:
+      parMessage(p, errGenerated, "number of spaces must be 0,1,2,4 or 8")
 
 #| module = stmt ^* (';' / IND{=})
 #|
@@ -650,6 +669,7 @@ proc primarySuffix(p: var TParser, r: PNode): PNode =
   while p.tok.indent < 0:
     case p.tok.tokType
     of tkParLe:
+      if p.strongSpaces and p.tok.strongSpaceA > 0: break
       result = namedParams(p, result, nkCall, tkParRi)
       if result.len > 1 and result.sons[1].kind == nkExprColonExpr:
         result.kind = nkObjConstr
@@ -664,8 +684,10 @@ proc primarySuffix(p: var TParser, r: PNode): PNode =
       result = dotExpr(p, result)
       result = parseGStrLit(p, result)
     of tkBracketLe:
+      if p.strongSpaces and p.tok.strongSpaceA > 0: break
       result = namedParams(p, result, nkBracketExpr, tkBracketRi)
     of tkCurlyLe:
+      if p.strongSpaces and p.tok.strongSpaceA > 0: break
       result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
     of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast:
       if p.inPragma == 0:
@@ -695,10 +717,11 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode
 proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
   result = primary(p, mode)
   # expand while operators have priorities higher than 'limit'
-  var opPrec = getPrecedence(p.tok)
+  var opPrec = getPrecedence(p.tok, p.strongSpaces)
   let modeB = if mode == pmTypeDef: pmTypeDesc else: mode
   # the operator itself must not start on a new line:
-  while opPrec >= limit and p.tok.indent < 0:
+  while opPrec >= limit and p.tok.indent < 0 and not isUnary(p):
+    checkBinary(p)
     var leftAssoc = ord(isLeftAssociative(p.tok))
     var a = newNodeP(nkInfix, p)
     var opNode = newIdentNodeP(p.tok.ident, p) # skip operator:
@@ -710,7 +733,7 @@ proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode =
     addSon(a, result)
     addSon(a, b)
     result = a
-    opPrec = getPrecedence(p.tok)
+    opPrec = getPrecedence(p.tok, p.strongSpaces)
   
 proc simpleExpr(p: var TParser, mode = pmNormal): PNode =
   result = simpleExprAux(p, -1, mode)
@@ -1933,7 +1956,9 @@ proc parseString(s: string, filename: string = "", line: int = 0): PNode =
   stream.lineOffset = line
 
   var parser: TParser
-  openParser(parser, filename, stream)
+  # XXX for now the builtin 'parseStmt/Expr' functions do not know about strong
+  # spaces...
+  openParser(parser, filename, stream, false)
 
   result = parser.parseAll
   closeParser(parser)
diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim
index bf3564016..f5d69a01c 100644
--- a/compiler/pragmas.nim
+++ b/compiler/pragmas.nim
@@ -97,8 +97,6 @@ proc makeExternImport(s: PSym, extname: string) =
   incl(s.flags, sfImportc)
   excl(s.flags, sfForward)
 
-const invalidIdentChars = AllChars - IdentChars
-
 proc validateExternCName(s: PSym, info: TLineInfo) =
   ## Validates that the symbol name in s.loc.r is a valid C identifier.
   ##
@@ -106,16 +104,14 @@ proc validateExternCName(s: PSym, info: TLineInfo) =
   ## starting with a number. If the check fails, a generic error will be
   ## displayed to the user.
   let target = ropeToStr(s.loc.r)
-  if target.len < 1 or (not (target[0] in IdentStartChars)) or
-      (not target.allCharsInSet(IdentChars)):
+  if target.len < 1 or target[0] notin IdentStartChars or
+      not target.allCharsInSet(IdentChars):
     localError(info, errGenerated, "invalid exported symbol")
 
 proc makeExternExport(s: PSym, extname: string, info: TLineInfo) =
   setExternName(s, extname)
-  case gCmd
-  of cmdCompileToC, cmdCompileToCpp, cmdCompileToOC:
+  if gCmd in {cmdCompileToC, cmdCompileToCpp, cmdCompileToOC}:
     validateExternCName(s, info)
-  else: discard
   incl(s.flags, sfExportc)
 
 proc processImportCompilerProc(s: PSym, extname: string) =
diff --git a/compiler/syntaxes.nim b/compiler/syntaxes.nim
index 7c44ec0b4..478c2a837 100644
--- a/compiler/syntaxes.nim
+++ b/compiler/syntaxes.nim
@@ -17,14 +17,15 @@ type
   TFilterKind* = enum 
     filtNone, filtTemplate, filtReplace, filtStrip
   TParserKind* = enum 
-    skinStandard, skinBraces, skinEndX
+    skinStandard, skinStrongSpaces, skinBraces, skinEndX
 
 const 
-  parserNames*: array[TParserKind, string] = ["standard", "braces", "endx"]
-  filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace", 
-    "strip"]
+  parserNames*: array[TParserKind, string] = ["standard", "strongspaces",
+                                              "braces", "endx"]
+  filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace",
+                                              "strip"]
 
-type 
+type
   TParsers*{.final.} = object 
     skin*: TParserKind
     parser*: TParser
@@ -54,7 +55,7 @@ proc parseFile(fileIdx: int32): PNode =
 
 proc parseAll(p: var TParsers): PNode = 
   case p.skin
-  of skinStandard: 
+  of skinStandard, skinStrongSpaces:
     result = parser.parseAll(p.parser)
   of skinBraces: 
     result = pbraces.parseAll(p.parser)
@@ -65,7 +66,7 @@ proc parseAll(p: var TParsers): PNode =
   
 proc parseTopLevelStmt(p: var TParsers): PNode = 
   case p.skin
-  of skinStandard: 
+  of skinStandard, skinStrongSpaces:
     result = parser.parseTopLevelStmt(p.parser)
   of skinBraces: 
     result = pbraces.parseTopLevelStmt(p.parser)
@@ -170,7 +171,9 @@ proc openParsers(p: var TParsers, fileIdx: int32, inputstream: PLLStream) =
   else: s = inputstream
   case p.skin
   of skinStandard, skinBraces, skinEndX:
-    parser.openParser(p.parser, fileIdx, s)
+    parser.openParser(p.parser, fileIdx, s, false)
+  of skinStrongSpaces:
+    parser.openParser(p.parser, fileIdx, s, true)
   
-proc closeParsers(p: var TParsers) = 
+proc closeParsers(p: var TParsers) =
   parser.closeParser(p.parser)
diff --git a/doc/manual.txt b/doc/manual.txt
index 98219360e..1c6cf6c1d 100644
--- a/doc/manual.txt
+++ b/doc/manual.txt
@@ -480,8 +480,8 @@ precedence and associativity; this is useful for meta programming.
 Associativity
 -------------
 
-All binary operators are left-associative, except binary operators whose
-relevant char is ``^``.
+Binary operators whose relevant character is ``^`` are right-associative, all
+other binary operators are left-associative.
 
 Precedence
 ----------
@@ -508,7 +508,7 @@ Precedence level    Operators                                      Relevant char
   7               ``+    -``                                       ``+  ~  |``         OP7
   6               ``&``                                            ``&``               OP6
   5               ``..``                                           ``.``               OP5
-  4               ``==  <= < >= > !=  in not_in is isnot not of``  ``= <  > !``        OP4
+  4               ``==  <= < >= > !=  in notin is isnot not of``   ``= <  > !``        OP4
   3               ``and``                                                              OP3
   2               ``or xor``                                                           OP2
   1                                                                ``@  : ?``          OP1
@@ -516,6 +516,46 @@ Precedence level    Operators                                      Relevant char
 ================  ===============================================  ==================  ===============
 
 
+Strong spaces
+-------------
+
+The number of spaces preceeding a non-keyword operator affects precedence
+if the experimental parser directive ``#!strongSpaces`` is used. Indentation
+is not used to determine the number of spaces. If 2 or more operators have the
+same number of preceeding spaces the precedence table applies, so ``1 + 3 * 4``
+is still parsed as ``1 + (3 * 4)``, but ``1+3 * 4`` is parsed as ``(1+3) * 4``:
+
+.. code-block:: nimrod
+  #! strongSpaces
+  if foo+4 * 4 == 8 and b&c | 9  ++
+      bar:
+    echo ""
+  # is parsed as
+  if ((foo+4)*4 == 8) and (((b&c) | 9) ++ bar): echo ""
+
+
+Furthermore whether an operator is used a prefix operator is affected by the
+number of spaces: 
+
+.. code-block:: nimrod
+  #! strongSpaces
+  echo $foo
+  # is parsed as
+  echo($foo)
+
+This also affects whether ``[]``, ``{}``, ``()`` are parsed as constructors
+or as accessors:
+
+.. code-block:: nimrod
+  #! strongSpaces
+  echo (1,2)
+  # is parsed as
+  echo((1,2))
+
+
+Grammar
+-------
+
 The grammar's start symbol is ``module``.
 
 .. include:: grammar.txt
diff --git a/todo.txt b/todo.txt
index a67ff5172..51f883d1d 100644
--- a/todo.txt
+++ b/todo.txt
@@ -32,7 +32,6 @@ version 0.9.x
 - ensure (ref T)(a, b) works as a type conversion and type constructor
 - optimize 'genericReset'; 'newException' leads to code bloat
 - stack-less GC
-- implement strongSpaces:on
 - make '--implicitStatic:on' the default
 - implicit deref for parameter matching
 
diff --git a/web/news.txt b/web/news.txt
index 0001cece7..83863cdd9 100644
--- a/web/news.txt
+++ b/web/news.txt
@@ -82,6 +82,7 @@ News
     - The *command syntax* is supported in a lot more contexts.
     - Anonymous iterators are now supported and iterators can capture variables
       of an outer proc.
+    - The experimental ``strongSpaces`` parsing mode has been implemented.
 
 
     Tools improvements
author	Araq <rumpf_a@web.de>	2014-03-07 22:25:05 +0100
committer	Araq <rumpf_a@web.de>	2014-03-07 22:25:05 +0100
commit	91d842e1ec070a9ab7f883820bd6244526f5d622 (patch)
tree	820f09d7ddb69b01ef6a27b43ea9de5ff0df2b0a
parent	7904446c47f952a026d408f04431dbaf6d887b37 (diff)
download	Nim-91d842e1ec070a9ab7f883820bd6244526f5d622.tar.gz