diff options
author | Araq <rumpf_a@web.de> | 2014-03-07 22:25:05 +0100 |
---|---|---|
committer | Araq <rumpf_a@web.de> | 2014-03-07 22:25:05 +0100 |
commit | 91d842e1ec070a9ab7f883820bd6244526f5d622 (patch) | |
tree | 820f09d7ddb69b01ef6a27b43ea9de5ff0df2b0a | |
parent | 7904446c47f952a026d408f04431dbaf6d887b37 (diff) | |
download | Nim-91d842e1ec070a9ab7f883820bd6244526f5d622.tar.gz |
implements strongSpaces parsing mode
-rw-r--r-- | compiler/canonicalizer.nim | 6 | ||||
-rw-r--r-- | compiler/lexer.nim | 16 | ||||
-rw-r--r-- | compiler/parser.nim | 65 | ||||
-rw-r--r-- | compiler/pragmas.nim | 10 | ||||
-rw-r--r-- | compiler/syntaxes.nim | 21 | ||||
-rw-r--r-- | doc/manual.txt | 46 | ||||
-rw-r--r-- | todo.txt | 1 | ||||
-rw-r--r-- | web/news.txt | 1 |
8 files changed, 123 insertions, 43 deletions
diff --git a/compiler/canonicalizer.nim b/compiler/canonicalizer.nim index 07e932b28..3bc4eb029 100644 --- a/compiler/canonicalizer.nim +++ b/compiler/canonicalizer.nim @@ -81,7 +81,7 @@ proc hashSym(c: var MD5Context, s: PSym) = proc hashTree(c: var MD5Context, n: PNode) = if n == nil: - c &= "noTreeKind" + c &= "\255" return var k = n.kind md5Update(c, cast[cstring](addr(k)), 1) @@ -107,7 +107,7 @@ proc hashTree(c: var MD5Context, n: PNode) = proc hashType(c: var MD5Context, t: PType) = # modelled after 'typeToString' if t == nil: - c &= "noTypeKind" + c &= "\254" return var k = t.kind @@ -168,7 +168,7 @@ proc canonConst(n: PNode): TUid = c.hashType(n.typ) md5Final(c, MD5Digest(result)) -proc canonSym(s: PSym): TUid +proc canonSym(s: PSym): TUid = var c: MD5Context md5Init(c) c.hashSym(s) diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 0e7df13cd..9c6c5e22f 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -110,6 +110,8 @@ type fNumber*: BiggestFloat # the parsed floating point literal base*: TNumericalBase # the numerical base; only valid for int # or float literals + strongSpaceA*: int8 # leading spaces of an operator + strongSpaceB*: int8 # trailing spaces of an operator literal*: string # the parsed (string) literal; and # documentation comments are here too line*, col*: int @@ -119,6 +121,7 @@ type indentAhead*: int # if > 0 an indendation has already been read # this is needed because scanning comments # needs so much look-ahead + strongSpaces*: bool var gLinesCompiled*: int # all lines that have been compiled @@ -183,6 +186,7 @@ proc initToken*(L: var TToken) = L.tokType = tkInvalid L.iNumber = 0 L.indent = 0 + L.strongSpaceA = 0 L.literal = "" L.fNumber = 0.0 L.base = base10 @@ -192,6 +196,7 @@ proc fillToken(L: var TToken) = L.tokType = tkInvalid L.iNumber = 0 L.indent = 0 + L.strongSpaceA = 0 setLen(L.literal, 0) L.fNumber = 0.0 L.base = base10 @@ -634,6 +639,14 @@ proc getOperator(L: var TLexer, tok: var TToken) = h = h !& ord(c) inc(pos) endOperator(L, tok, pos, h) + # advance pos but don't store it in L.bufpos so the next token (which might + # be an operator too) gets the preceeding spaces: + tok.strongSpaceB = 0 + while buf[pos] == ' ': + inc pos + inc tok.strongSpaceB + if buf[pos] in {CR, LF, nimlexbase.EndOfFile}: + tok.strongSpaceB = -1 proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos @@ -677,10 +690,12 @@ proc scanComment(L: var TLexer, tok: var TToken) = proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf + tok.strongSpaceA = 0 while true: case buf[pos] of ' ': inc(pos) + inc(tok.strongSpaceA) of Tabulator: lexMessagePos(L, errTabulatorsAreNotAllowed, pos) inc(pos) @@ -691,6 +706,7 @@ proc skip(L: var TLexer, tok: var TToken) = while buf[pos] == ' ': inc(pos) inc(indent) + tok.strongSpaceA = 0 if buf[pos] > ' ': tok.indent = indent break diff --git a/compiler/parser.nim b/compiler/parser.nim index 5a5bfb574..cfba89f4a 100644 --- a/compiler/parser.nim +++ b/compiler/parser.nim @@ -38,7 +38,6 @@ type inSemiStmtList: int proc parseAll*(p: var TParser): PNode -proc openParser*(p: var TParser, filename: string, inputstream: PLLStream) proc closeParser*(p: var TParser) proc parseTopLevelStmt*(p: var TParser): PNode # implements an iterator. Returns the next top-level statement or @@ -50,7 +49,6 @@ proc parseString*(s: string, filename: string = "", line: int = 0): PNode # correct error messages referring to the original source. # helpers for the other parsers -proc getPrecedence*(tok: TToken): int proc isOperator*(tok: TToken): bool proc getTok*(p: var TParser) proc parMessage*(p: TParser, msg: TMsgKind, arg: string = "") @@ -77,14 +75,17 @@ proc parseCase(p: var TParser): PNode proc getTok(p: var TParser) = rawGetTok(p.lex, p.tok) -proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream) = +proc openParser*(p: var TParser, fileIdx: int32, inputStream: PLLStream, + strongSpaces=false) = initToken(p.tok) openLexer(p.lex, fileIdx, inputStream) getTok(p) # read the first token p.firstTok = true + p.strongSpaces = strongSpaces -proc openParser*(p: var TParser, filename: string, inputStream: PLLStream) = - openParser(p, filename.fileInfoIdx, inputstream) +proc openParser*(p: var TParser, filename: string, inputStream: PLLStream, + strongSpaces=false) = + openParser(p, filename.fileInfoIdx, inputstream, strongSpaces) proc closeParser(p: var TParser) = closeLexer(p.lex) @@ -193,34 +194,52 @@ proc isSigilLike(tok: TToken): bool {.inline.} = proc isLeftAssociative(tok: TToken): bool {.inline.} = result = tok.tokType != tkOpr or relevantOprChar(tok.ident) != '^' -proc getPrecedence(tok: TToken): int = +proc getPrecedence(tok: TToken, strongSpaces: bool): int = + template considerStrongSpaces(x): expr = + x + (if strongSpaces: 100 - tok.strongSpaceA.int*10 else: 0) + case tok.tokType of tkOpr: let L = tok.ident.s.len let relevantChar = relevantOprChar(tok.ident) - template considerAsgn(value: expr) = - result = if tok.ident.s[L-1] == '=': 1 else: value + template considerAsgn(value: expr) = + result = if tok.ident.s[L-1] == '=': 1 else: considerStrongSpaces(value) case relevantChar of '$', '^': considerAsgn(10) of '*', '%', '/', '\\': considerAsgn(9) - of '~': result = 8 + of '~': result = considerStrongSpaces(8) of '+', '-', '|': considerAsgn(8) of '&': considerAsgn(7) - of '=', '<', '>', '!': result = 5 + of '=', '<', '>', '!': result = considerStrongSpaces(5) of '.': considerAsgn(6) - of '?': result = 2 + of '?': result = considerStrongSpaces(2) else: considerAsgn(2) of tkDiv, tkMod, tkShl, tkShr: result = 9 of tkIn, tkNotin, tkIs, tkIsnot, tkNot, tkOf, tkAs: result = 5 - of tkDotDot: result = 6 + of tkDotDot: result = considerStrongSpaces(6) of tkAnd: result = 4 of tkOr, tkXor: result = 3 - else: result = - 10 - -proc isOperator(tok: TToken): bool = - result = getPrecedence(tok) >= 0 + else: result = -10 + +proc isOperator(tok: TToken): bool = + tok.tokType in {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs, + tkIsnot, tkNot, tkOf, tkAs, tkDotDot, tkAnd, tkOr, tkXor} + +proc isUnary(p: TParser): bool = + p.strongSpaces and p.tok.tokType in {tkOpr, tkDotDot} and + p.tok.strongSpaceB == 0 and + p.tok.strongSpaceA > 0 + +proc checkBinary(p: TParser) {.inline.} = + # we don't check '..' here as that's too annoying + if p.strongSpaces and p.tok.tokType == tkOpr: + if p.tok.strongSpaceB > 0 and p.tok.strongSpaceA != p.tok.strongSpaceB: + parMessage(p, errGenerated, "number of spaces around '$#' not consistent"% + prettyTok(p.tok)) + elif p.tok.strongSpaceA notin {0,1,2,4,8}: + parMessage(p, errGenerated, "number of spaces must be 0,1,2,4 or 8") #| module = stmt ^* (';' / IND{=}) #| @@ -650,6 +669,7 @@ proc primarySuffix(p: var TParser, r: PNode): PNode = while p.tok.indent < 0: case p.tok.tokType of tkParLe: + if p.strongSpaces and p.tok.strongSpaceA > 0: break result = namedParams(p, result, nkCall, tkParRi) if result.len > 1 and result.sons[1].kind == nkExprColonExpr: result.kind = nkObjConstr @@ -664,8 +684,10 @@ proc primarySuffix(p: var TParser, r: PNode): PNode = result = dotExpr(p, result) result = parseGStrLit(p, result) of tkBracketLe: + if p.strongSpaces and p.tok.strongSpaceA > 0: break result = namedParams(p, result, nkBracketExpr, tkBracketRi) of tkCurlyLe: + if p.strongSpaces and p.tok.strongSpaceA > 0: break result = namedParams(p, result, nkCurlyExpr, tkCurlyRi) of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast: if p.inPragma == 0: @@ -695,10 +717,11 @@ proc primary(p: var TParser, mode: TPrimaryMode): PNode proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode = result = primary(p, mode) # expand while operators have priorities higher than 'limit' - var opPrec = getPrecedence(p.tok) + var opPrec = getPrecedence(p.tok, p.strongSpaces) let modeB = if mode == pmTypeDef: pmTypeDesc else: mode # the operator itself must not start on a new line: - while opPrec >= limit and p.tok.indent < 0: + while opPrec >= limit and p.tok.indent < 0 and not isUnary(p): + checkBinary(p) var leftAssoc = ord(isLeftAssociative(p.tok)) var a = newNodeP(nkInfix, p) var opNode = newIdentNodeP(p.tok.ident, p) # skip operator: @@ -710,7 +733,7 @@ proc simpleExprAux(p: var TParser, limit: int, mode: TPrimaryMode): PNode = addSon(a, result) addSon(a, b) result = a - opPrec = getPrecedence(p.tok) + opPrec = getPrecedence(p.tok, p.strongSpaces) proc simpleExpr(p: var TParser, mode = pmNormal): PNode = result = simpleExprAux(p, -1, mode) @@ -1933,7 +1956,9 @@ proc parseString(s: string, filename: string = "", line: int = 0): PNode = stream.lineOffset = line var parser: TParser - openParser(parser, filename, stream) + # XXX for now the builtin 'parseStmt/Expr' functions do not know about strong + # spaces... + openParser(parser, filename, stream, false) result = parser.parseAll closeParser(parser) diff --git a/compiler/pragmas.nim b/compiler/pragmas.nim index bf3564016..f5d69a01c 100644 --- a/compiler/pragmas.nim +++ b/compiler/pragmas.nim @@ -97,8 +97,6 @@ proc makeExternImport(s: PSym, extname: string) = incl(s.flags, sfImportc) excl(s.flags, sfForward) -const invalidIdentChars = AllChars - IdentChars - proc validateExternCName(s: PSym, info: TLineInfo) = ## Validates that the symbol name in s.loc.r is a valid C identifier. ## @@ -106,16 +104,14 @@ proc validateExternCName(s: PSym, info: TLineInfo) = ## starting with a number. If the check fails, a generic error will be ## displayed to the user. let target = ropeToStr(s.loc.r) - if target.len < 1 or (not (target[0] in IdentStartChars)) or - (not target.allCharsInSet(IdentChars)): + if target.len < 1 or target[0] notin IdentStartChars or + not target.allCharsInSet(IdentChars): localError(info, errGenerated, "invalid exported symbol") proc makeExternExport(s: PSym, extname: string, info: TLineInfo) = setExternName(s, extname) - case gCmd - of cmdCompileToC, cmdCompileToCpp, cmdCompileToOC: + if gCmd in {cmdCompileToC, cmdCompileToCpp, cmdCompileToOC}: validateExternCName(s, info) - else: discard incl(s.flags, sfExportc) proc processImportCompilerProc(s: PSym, extname: string) = diff --git a/compiler/syntaxes.nim b/compiler/syntaxes.nim index 7c44ec0b4..478c2a837 100644 --- a/compiler/syntaxes.nim +++ b/compiler/syntaxes.nim @@ -17,14 +17,15 @@ type TFilterKind* = enum filtNone, filtTemplate, filtReplace, filtStrip TParserKind* = enum - skinStandard, skinBraces, skinEndX + skinStandard, skinStrongSpaces, skinBraces, skinEndX const - parserNames*: array[TParserKind, string] = ["standard", "braces", "endx"] - filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace", - "strip"] + parserNames*: array[TParserKind, string] = ["standard", "strongspaces", + "braces", "endx"] + filterNames*: array[TFilterKind, string] = ["none", "stdtmpl", "replace", + "strip"] -type +type TParsers*{.final.} = object skin*: TParserKind parser*: TParser @@ -54,7 +55,7 @@ proc parseFile(fileIdx: int32): PNode = proc parseAll(p: var TParsers): PNode = case p.skin - of skinStandard: + of skinStandard, skinStrongSpaces: result = parser.parseAll(p.parser) of skinBraces: result = pbraces.parseAll(p.parser) @@ -65,7 +66,7 @@ proc parseAll(p: var TParsers): PNode = proc parseTopLevelStmt(p: var TParsers): PNode = case p.skin - of skinStandard: + of skinStandard, skinStrongSpaces: result = parser.parseTopLevelStmt(p.parser) of skinBraces: result = pbraces.parseTopLevelStmt(p.parser) @@ -170,7 +171,9 @@ proc openParsers(p: var TParsers, fileIdx: int32, inputstream: PLLStream) = else: s = inputstream case p.skin of skinStandard, skinBraces, skinEndX: - parser.openParser(p.parser, fileIdx, s) + parser.openParser(p.parser, fileIdx, s, false) + of skinStrongSpaces: + parser.openParser(p.parser, fileIdx, s, true) -proc closeParsers(p: var TParsers) = +proc closeParsers(p: var TParsers) = parser.closeParser(p.parser) diff --git a/doc/manual.txt b/doc/manual.txt index 98219360e..1c6cf6c1d 100644 --- a/doc/manual.txt +++ b/doc/manual.txt @@ -480,8 +480,8 @@ precedence and associativity; this is useful for meta programming. Associativity ------------- -All binary operators are left-associative, except binary operators whose -relevant char is ``^``. +Binary operators whose relevant character is ``^`` are right-associative, all +other binary operators are left-associative. Precedence ---------- @@ -508,7 +508,7 @@ Precedence level Operators Relevant char 7 ``+ -`` ``+ ~ |`` OP7 6 ``&`` ``&`` OP6 5 ``..`` ``.`` OP5 - 4 ``== <= < >= > != in not_in is isnot not of`` ``= < > !`` OP4 + 4 ``== <= < >= > != in notin is isnot not of`` ``= < > !`` OP4 3 ``and`` OP3 2 ``or xor`` OP2 1 ``@ : ?`` OP1 @@ -516,6 +516,46 @@ Precedence level Operators Relevant char ================ =============================================== ================== =============== +Strong spaces +------------- + +The number of spaces preceeding a non-keyword operator affects precedence +if the experimental parser directive ``#!strongSpaces`` is used. Indentation +is not used to determine the number of spaces. If 2 or more operators have the +same number of preceeding spaces the precedence table applies, so ``1 + 3 * 4`` +is still parsed as ``1 + (3 * 4)``, but ``1+3 * 4`` is parsed as ``(1+3) * 4``: + +.. code-block:: nimrod + #! strongSpaces + if foo+4 * 4 == 8 and b&c | 9 ++ + bar: + echo "" + # is parsed as + if ((foo+4)*4 == 8) and (((b&c) | 9) ++ bar): echo "" + + +Furthermore whether an operator is used a prefix operator is affected by the +number of spaces: + +.. code-block:: nimrod + #! strongSpaces + echo $foo + # is parsed as + echo($foo) + +This also affects whether ``[]``, ``{}``, ``()`` are parsed as constructors +or as accessors: + +.. code-block:: nimrod + #! strongSpaces + echo (1,2) + # is parsed as + echo((1,2)) + + +Grammar +------- + The grammar's start symbol is ``module``. .. include:: grammar.txt diff --git a/todo.txt b/todo.txt index a67ff5172..51f883d1d 100644 --- a/todo.txt +++ b/todo.txt @@ -32,7 +32,6 @@ version 0.9.x - ensure (ref T)(a, b) works as a type conversion and type constructor - optimize 'genericReset'; 'newException' leads to code bloat - stack-less GC -- implement strongSpaces:on - make '--implicitStatic:on' the default - implicit deref for parameter matching diff --git a/web/news.txt b/web/news.txt index 0001cece7..83863cdd9 100644 --- a/web/news.txt +++ b/web/news.txt @@ -82,6 +82,7 @@ News - The *command syntax* is supported in a lot more contexts. - Anonymous iterators are now supported and iterators can capture variables of an outer proc. + - The experimental ``strongSpaces`` parsing mode has been implemented. Tools improvements |