diff options
author | Andreas Rumpf <rumpf_a@web.de> | 2019-06-09 22:22:36 +0200 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2019-06-09 22:22:50 +0200 |
commit | cb47e49d3bceb616b26bb3dc7bc0ec1a34927013 (patch) | |
tree | b2bc302db622ccaa3c20a1207c303abda0b41337 /compiler | |
parent | 8317de76486d8d7652c2fdeaa444e01bd6f17151 (diff) | |
download | Nim-cb47e49d3bceb616b26bb3dc7bc0ec1a34927013.tar.gz |
nimpretty: keep a seq of tokens instead of a single string to enable better transformations [refactoring]
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/layouter.nim | 162 |
1 files changed, 90 insertions, 72 deletions
diff --git a/compiler/layouter.nim b/compiler/layouter.nim index 0b88a4aea..7ada05e87 100644 --- a/compiler/layouter.nim +++ b/compiler/layouter.nim @@ -12,6 +12,7 @@ import idents, lexer, lineinfos, llstream, options, msgs, strutils, pathutils from os import changeFileExt +from sequtils import delete const MaxLineLen = 80 @@ -37,8 +38,8 @@ type col, lastLineNumber, lineSpan, indentLevel, indWidth*: int keepIndents*: int doIndentMore*: int - content: string kinds: seq[LayoutToken] + tokens: seq[string] indentStack: seq[int] fixedUntil: int # marks where we must not go in the content altSplitPos: array[SplitKind, int] # alternative split positions @@ -55,21 +56,25 @@ proc openEmitter*(em: var Emitter, cache: IdentCache; em.lastTok = tkInvalid em.inquote = false em.col = 0 - em.content = newStringOfCap(16_000) em.indentStack = newSeqOfCap[int](30) em.indentStack.add 0 em.lastLineNumber = 1 proc closeEmitter*(em: var Emitter) = let outFile = em.config.absOutFile - if fileExists(outFile) and readFile(outFile.string) == em.content: + + var content = newStringOfCap(16_000) + for i in 0..em.tokens.high: + content.add em.tokens[i] + + if fileExists(outFile) and readFile(outFile.string) == content: discard "do nothing, see #9499" return var f = llStreamOpen(outFile, fmWrite) if f == nil: rawMessage(em.config, errGenerated, "cannot open file: " & outFile.string) return - f.llStreamWrite em.content + f.llStreamWrite content llStreamClose(f) proc countNewlines(s: string): int = @@ -85,27 +90,29 @@ proc calcCol(em: var Emitter; s: string) = inc em.col proc wr(em: var Emitter; x: string; lt: LayoutToken) = - em.content.add x + em.tokens.add x em.kinds.add lt inc em.col, x.len + assert em.tokens.len == em.kinds.len -when false: - proc wrNewline(em: var Emitter) = - em.tokens.add "\L" - em.kinds.add ltNewline - inc em.tokensSum, 1 - em.col = 0 +proc wrNewline(em: var Emitter) = + em.tokens.add "\L" + em.kinds.add ltNewline + em.col = 0 - proc wrSpaces(em: var Emitter; spaces: Natural) = +proc wrSpaces(em: var Emitter; spaces: int) = + if spaces > 0: wr(em, strutils.repeat(' ', spaces), ltSpaces) - proc removeSpaces(em: var Emitter) = - while em.kinds.len > 0 and em.kinds[^1] == ltSpaces: - let tokenLen = em.tokens[^1].len - setLen(em.tokens, em.tokens.len-1) - setLen(em.kinds, em.kinds.len-1) - dec em.tokensSum, tokenLen - dec em.col, tokenLen +proc wrSpace(em: var Emitter) = + wr(em, " ", ltSpaces) + +proc removeSpaces(em: var Emitter) = + while em.kinds.len > 0 and em.kinds[^1] == ltSpaces: + let tokenLen = em.tokens[^1].len + setLen(em.tokens, em.tokens.len-1) + setLen(em.kinds, em.kinds.len-1) + dec em.col, tokenLen template goodCol(col): bool = col in 40..MaxLineLen @@ -123,7 +130,7 @@ const template rememberSplit(kind) = if goodCol(em.col): - em.altSplitPos[kind] = em.content.len + em.altSplitPos[kind] = em.tokens.len template moreIndent(em): int = (if em.doIndentMore > 0: em.indWidth*2 else: em.indWidth) @@ -137,22 +144,31 @@ proc softLinebreak(em: var Emitter, lit: string) = # bug #10295, check first if even more indentation would help: let spaces = em.indentLevel+moreIndent(em) if spaces < em.col: - while em.content.len > 0 and em.content[em.content.high] == ' ': - setLen(em.content, em.content.len-1) - wr(em, "\L", ltNewline) + removeSpaces em + wrNewline(em) em.col = 0 - for i in 1..spaces: wr(em, " ", ltSpaces) + wrSpaces em, spaces else: # search backwards for a good split position: for a in mitems(em.altSplitPos): if a > em.fixedUntil: var spaces = 0 - while a+spaces < em.content.len and em.content[a+spaces] == ' ': + while a+spaces < em.kinds.len and em.kinds[a+spaces] == ltSpaces: inc spaces - if spaces > 0: delete(em.content, a, a+spaces-1) - em.col = em.content.len - a - let ws = "\L" & repeat(' ', em.indentLevel+moreIndent(em)) - em.content.insert(ws, a) + if spaces > 0: + delete(em.tokens, a, a+spaces-1) + delete(em.kinds, a, a+spaces-1) + em.kinds.insert(ltNewline, a) + em.tokens.insert("\L", a) + em.kinds.insert(ltSpaces, a+1) + em.tokens.insert(repeat(' ', em.indentLevel+moreIndent(em)), a+1) + # recompute em.col: + var i = em.kinds.len-1 + em.col = 0 + while i >= 0 and em.kinds[i] != ltNewline: + inc em.col, em.tokens[i].len + dec i + # mark position as "already split here" a = -1 break @@ -168,7 +184,7 @@ proc emitMultilineComment(em: var Emitter, lit: string, col: int) = if i == 0: discard elif stripped.len == 0: - wr em, "\L", ltNewline + wrNewline em else: if a > lastIndent: b += em.indWidth @@ -176,17 +192,17 @@ proc emitMultilineComment(em: var Emitter, lit: string, col: int) = elif a < lastIndent: b -= em.indWidth lastIndent = a - wr em, "\L", ltNewline - for i in 1 .. col + b: wr(em, " ", ltSpaces) + wrNewline em + wrSpaces em, col + b wr em, stripped, ltComment inc i proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = template endsInWhite(em): bool = - em.content.len == 0 or em.content[em.content.high] in {' ', '\L'} + em.kinds.len == 0 or em.kinds[em.kinds.high] in {ltSpaces, ltNewline} template endsInAlpha(em): bool = - em.content.len > 0 and em.content[em.content.high] in SymChars+{'_'} + em.tokens.len > 0 and em.tokens[em.tokens.high][^1] in SymChars+{'_'} template wasExportMarker(em): bool = em.kinds.len > 0 and em.kinds[^1] == ltExportMarker @@ -197,9 +213,9 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = em.lineSpan = countNewlines(lit) if em.lineSpan > 0: calcCol(em, lit) if not endsInWhite(em): - wr(em, " ", ltSpaces) + wrSpace em if em.lineSpan == 0 and max(em.col, LineCommentColumn) + lit.len <= MaxLineLen: - for i in 1 .. LineCommentColumn - em.col: wr(em, " ", ltSpaces) + wrSpaces em, LineCommentColumn - em.col if em.lineSpan == 0: wr em, lit, ltComment else: @@ -209,13 +225,12 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = case tok.literal of "#!nimpretty off": inc em.keepIndents - wr(em, "\L", ltNewline) + wrNewline em em.lastLineNumber = tok.line + 1 of "#!nimpretty on": dec em.keepIndents em.lastLineNumber = tok.line - wr(em, "\L", ltNewline) - #for i in 1 .. tok.indent: wr " " + wrNewline em wr em, tok.literal, ltComment em.col = 0 em.lineSpan = 0 @@ -226,7 +241,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = # we have an inline comment so handle it before the indentation token: emitComment(em, tok) preventComment = true - em.fixedUntil = em.content.high + em.fixedUntil = em.tokens.high elif tok.indent >= 0: if em.keepIndents > 0: @@ -253,24 +268,21 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = is not touched. ]# # remove trailing whitespace: - while em.content.len > 0 and em.content[em.content.high] == ' ': - setLen(em.content, em.content.len-1) - wr(em, "\L", ltNewline) - for i in 2..tok.line - em.lastLineNumber: wr(em, "\L", ltNewline) - em.col = 0 - for i in 1..em.indentLevel: - wr(em, " ", ltSpaces) - em.fixedUntil = em.content.high + removeSpaces em + wrNewline em + for i in 2..tok.line - em.lastLineNumber: wrNewline(em) + wrSpaces em, em.indentLevel + em.fixedUntil = em.tokens.high var lastTokWasTerse = false case tok.tokType of tokKeywordLow..tokKeywordHigh: if endsInAlpha(em): - wr(em, " ", ltSpaces) + wrSpace em elif not em.inquote and not endsInWhite(em) and em.lastTok notin openPars and not em.lastTokWasTerse: #and tok.tokType in oprSet - wr(em, " ", ltSpaces) + wrSpace em if not em.inquote: wr(em, TokTypeToStr[tok.tokType], ltKeyword) @@ -280,7 +292,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = of tkOr: rememberSplit(splitOr) of tkIn, tkNotin: rememberSplit(splitIn) - wr(em, " ", ltSpaces) + wrSpace em else: discard else: # keywords in backticks are not normalized: @@ -288,15 +300,15 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = of tkColon: wr(em, TokTypeToStr[tok.tokType], ltOther) - wr(em, " ", ltSpaces) + wrSpace em of tkSemicolon, tkComma: wr(em, TokTypeToStr[tok.tokType], ltOther) rememberSplit(splitComma) - wr(em, " ", ltSpaces) + wrSpace em of tkParDotLe, tkParLe, tkBracketDotLe, tkBracketLe, tkCurlyLe, tkCurlyDotLe, tkBracketLeColon: if tok.strongSpaceA > 0 and not em.endsInWhite and not em.wasExportMarker: - wr(em, " ", ltSpaces) + wrSpace em wr(em, TokTypeToStr[tok.tokType], ltOther) rememberSplit(splitParLe) of tkParRi, @@ -310,9 +322,9 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = lastTokWasTerse = true wr(em, TokTypeToStr[tok.tokType], ltOther) of tkEquals: - if not em.inquote and not em.endsInWhite: wr(em, " ", ltSpaces) + if not em.inquote and not em.endsInWhite: wrSpace(em) wr(em, TokTypeToStr[tok.tokType], ltOther) - if not em.inquote: wr(em, " ", ltSpaces) + if not em.inquote: wrSpace(em) of tkOpr, tkDotDot: if ((tok.strongSpaceA == 0 and tok.strongSpaceB == 0) or em.inquote) and tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]: @@ -321,16 +333,16 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = # if not surrounded by whitespace, don't produce any whitespace either: wr(em, tok.ident.s, ltOpr) else: - if not em.endsInWhite: wr(em, " ", ltSpaces) + if not em.endsInWhite: wrSpace(em) wr(em, tok.ident.s, ltOpr) template isUnary(tok): bool = tok.strongSpaceB == 0 and tok.strongSpaceA > 0 if not isUnary(tok): rememberSplit(splitBinary) - wr(em, " ", ltSpaces) + wrSpace(em) of tkAccent: - if not em.inquote and endsInAlpha(em): wr(em, " ", ltSpaces) + if not em.inquote and endsInAlpha(em): wrSpace(em) wr(em, TokTypeToStr[tok.tokType], ltOther) em.inquote = not em.inquote of tkComment: @@ -339,7 +351,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = of tkIntLit..tkStrLit, tkRStrLit, tkTripleStrLit, tkGStrLit, tkGTripleStrLit, tkCharLit: let lit = fileSection(em.config, em.fid, tok.offsetA, tok.offsetB) softLinebreak(em, lit) - if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wr(em, " ", ltSpaces) + if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wrSpace(em) em.lineSpan = countNewlines(lit) if em.lineSpan > 0: calcCol(em, lit) wr em, lit, ltLit @@ -347,7 +359,7 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = else: let lit = if tok.ident != nil: tok.ident.s else: tok.literal softLinebreak(em, lit) - if endsInAlpha(em): wr(em, " ", ltSpaces) + if endsInAlpha(em): wrSpace(em) wr em, lit, ltIdent em.lastTok = tok.tokType @@ -355,21 +367,27 @@ proc emitTok*(em: var Emitter; L: TLexer; tok: TToken) = em.lastLineNumber = tok.line + em.lineSpan em.lineSpan = 0 +proc endsWith(em: Emitter; k: varargs[string]): bool = + if em.tokens.len < k.len: return false + for i in 0..high(k): + if em.tokens[em.tokens.len - k.len + i] != k[i]: return false + return true + proc starWasExportMarker*(em: var Emitter) = - em.kinds[^1] = ltExportMarker - if em.content.endsWith(" * "): - setLen(em.content, em.content.len-3) - em.content.add("*") + if em.endsWith(" ", "*", " "): + setLen(em.tokens, em.tokens.len-3) + setLen(em.kinds, em.kinds.len-3) + em.tokens.add("*") + em.kinds.add ltExportMarker dec em.col, 2 proc commaWasSemicolon*(em: var Emitter) = if em.semicolons == detectSemicolonKind: - em.semicolons = if em.content.endsWith(", "): dontTouch else: useSemicolon - if em.semicolons == useSemicolon and em.content.endsWith(", "): - setLen(em.content, em.content.len-2) - em.content.add("; ") + em.semicolons = if em.endsWith(",", " "): dontTouch else: useSemicolon + if em.semicolons == useSemicolon and em.endsWith(",", " "): + em.tokens[em.tokens.len-2] = ";" proc curlyRiWasPragma*(em: var Emitter) = - if em.content.endsWith("}"): - setLen(em.content, em.content.len-1) - em.content.add(".}") + if em.endsWith("}"): + em.tokens[em.tokens.len-1] = ".}" + inc em.col |