diff options
Diffstat (limited to 'compiler/layouter.nim')
-rw-r--r-- | compiler/layouter.nim | 609 |
1 files changed, 609 insertions, 0 deletions
diff --git a/compiler/layouter.nim b/compiler/layouter.nim new file mode 100644 index 000000000..0121b1185 --- /dev/null +++ b/compiler/layouter.nim @@ -0,0 +1,609 @@ +# +# +# The Nim Compiler +# (c) Copyright 2018 Andreas Rumpf +# +# See the file "copying.txt", included in this +# distribution, for details about the copyright. +# + +## Layouter for nimpretty. + +import idents, lexer, ast, lineinfos, llstream, options, msgs, strutils, pathutils + +const + MinLineLen = 15 + +type + SplitKind = enum + splitComma, splitParLe, splitAnd, splitOr, splitIn, splitBinary + + SemicolonKind = enum + detectSemicolonKind, useSemicolon, dontTouch + + LayoutToken* = enum + ltSpaces, + ltCrucialNewline, ## a semantically crucial newline (indentation!) + ltSplittingNewline, ## newline used for splitting up long + ## expressions (like after a comma or a binary operator) + ltTab, + ltOptionalNewline, ## optional newline introduced by nimpretty + ltComment, ltLit, ltKeyword, ltExportMarker, ltIdent, + ltOther, ltOpr, ltSomeParLe, ltSomeParRi, + ltBeginSection, ltEndSection + + Emitter* = object + config: ConfigRef + fid: FileIndex + lastTok: TokType + inquote, lastTokWasTerse: bool + semicolons: SemicolonKind + col, lastLineNumber, lineSpan, indentLevel, indWidth*, inSection: int + keepIndents*: int + doIndentMore*: int + kinds*: seq[LayoutToken] + tokens*: seq[string] + indentStack: seq[int] + fixedUntil: int # marks where we must not go in the content + altSplitPos: array[SplitKind, int] # alternative split positions + maxLineLen*: int + +proc openEmitter*(em: var Emitter, cache: IdentCache; + config: ConfigRef, fileIdx: FileIndex) = + let fullPath = AbsoluteFile config.toFullPath(fileIdx) + if em.indWidth == 0: + em.indWidth = getIndentWidth(fileIdx, llStreamOpen(fullPath, fmRead), + cache, config) + if em.indWidth == 0: em.indWidth = 2 + em.config = config + em.fid = fileIdx + em.lastTok = tkInvalid + em.inquote = false + em.col = 0 + em.indentStack = newSeqOfCap[int](30) + em.indentStack.add 0 + em.lastLineNumber = 1 + +proc computeMax(em: Emitter; pos: int): int = + var p = pos + var extraSpace = 0 + result = 0 + while p < em.tokens.len and em.kinds[p] != ltEndSection: + var lhs = 0 + var lineLen = 0 + var foundTab = false + while p < em.tokens.len and em.kinds[p] != ltEndSection: + if em.kinds[p] in {ltCrucialNewline, ltSplittingNewline}: + if foundTab and lineLen <= em.maxLineLen: + result = max(result, lhs + extraSpace) + inc p + break + if em.kinds[p] == ltTab: + extraSpace = if em.kinds[p-1] == ltSpaces: 0 else: 1 + foundTab = true + else: + if not foundTab: + inc lhs, em.tokens[p].len + inc lineLen, em.tokens[p].len + inc p + +proc computeRhs(em: Emitter; pos: int): int = + var p = pos + result = 0 + while p < em.tokens.len and em.kinds[p] notin {ltCrucialNewline, ltSplittingNewline}: + inc result, em.tokens[p].len + inc p + +proc isLongEnough(lineLen, startPos, endPos: int): bool = + result = lineLen > MinLineLen and endPos > startPos + 4 + +proc findNewline(em: Emitter; p, lineLen: var int) = + while p < em.tokens.len and em.kinds[p] notin {ltCrucialNewline, ltSplittingNewline}: + inc lineLen, em.tokens[p].len + inc p + +proc countNewlines(s: string): int = + result = 0 + for i in 0..<s.len: + if s[i] == '\L': inc result + +proc calcCol(em: var Emitter; s: string) = + var i = s.len-1 + em.col = 0 + while i >= 0 and s[i] != '\L': + dec i + inc em.col + +proc optionalIsGood(em: var Emitter; pos, currentLen: int): bool = + let ourIndent = em.tokens[pos].len + var p = pos+1 + var lineLen = 0 + em.findNewline(p, lineLen) + if p == pos+1: # optionalNewline followed by another newline + result = false + elif em.kinds[p-1] == ltComment and currentLen+lineLen < em.maxLineLen+MinLineLen: + result = false + elif p+1 < em.tokens.len and em.kinds[p+1] == ltSpaces and + em.kinds[p-1] == ltOptionalNewline: + if em.tokens[p+1].len == ourIndent: + # concatenate lines with the same indententation + var nlPos = p + var lineLenTotal = lineLen + inc p + em.findNewline(p, lineLenTotal) + if isLongEnough(lineLenTotal, nlPos, p): + em.kinds[nlPos] = ltOptionalNewline + if em.kinds[nlPos+1] == ltSpaces: + # inhibit extra spaces when concatenating two lines + em.tokens[nlPos+1] = if em.tokens[nlPos-2] == ",": " " else: "" + result = true + elif em.tokens[p+1].len < ourIndent: + result = isLongEnough(lineLen, pos, p) + elif em.kinds[pos+1] in {ltOther, ltSomeParLe, ltSomeParRi}: # note: pos+1, not p+1 + result = false + else: + result = isLongEnough(lineLen, pos, p) + +proc lenOfNextTokens(em: Emitter; pos: int): int = + result = 0 + for i in 1..<em.tokens.len-pos: + if em.kinds[pos+i] in {ltCrucialNewline, ltSplittingNewline, ltOptionalNewline}: break + inc result, em.tokens[pos+i].len + +proc guidingInd(em: Emitter; pos: int): int = + var i = pos - 1 + while i >= 0 and em.kinds[i] != ltSomeParLe: + dec i + while i+1 <= em.kinds.high and em.kinds[i] != ltSomeParRi: + if em.kinds[i] == ltSplittingNewline and em.kinds[i+1] == ltSpaces: + return em.tokens[i+1].len + inc i + result = -1 + +proc renderTokens*(em: var Emitter): string = + ## Render Emitter tokens to a string of code + template defaultCase() = + content.add em.tokens[i] + inc lineLen, em.tokens[i].len + var content = newStringOfCap(16_000) + var maxLhs = 0 + var lineLen = 0 + var lineBegin = 0 + var openPars = 0 + var i = 0 + while i <= em.tokens.high: + when defined(debug): + echo (token: em.tokens[i], kind: em.kinds[i]) + case em.kinds[i] + of ltBeginSection: + maxLhs = computeMax(em, lineBegin) + of ltEndSection: + maxLhs = 0 + lineBegin = i+1 + of ltTab: + if i >= 2 and em.kinds[i-2] in {ltCrucialNewline, ltSplittingNewline} and + em.kinds[i-1] in {ltCrucialNewline, ltSplittingNewline, ltSpaces}: + # a previous section has ended + maxLhs = 0 + + if maxLhs == 0: + if em.kinds[i-1] != ltSpaces: + content.add em.tokens[i] + inc lineLen, em.tokens[i].len + else: + # pick the shorter indentation token: + var spaces = maxLhs - lineLen + if spaces < em.tokens[i].len or computeRhs(em, i+1)+maxLhs <= em.maxLineLen+MinLineLen: + if spaces <= 0 and content[^1] notin {' ', '\L'}: spaces = 1 + for j in 1..spaces: content.add ' ' + inc lineLen, spaces + else: + content.add em.tokens[i] + inc lineLen, em.tokens[i].len + of ltCrucialNewline, ltSplittingNewline: + content.add em.tokens[i] + lineLen = 0 + lineBegin = i+1 + of ltOptionalNewline: + let totalLineLen = lineLen + lenOfNextTokens(em, i) + if totalLineLen > em.maxLineLen and optionalIsGood(em, i, lineLen): + if i-1 >= 0 and em.kinds[i-1] == ltSpaces: + let spaces = em.tokens[i-1].len + content.setLen(content.len - spaces) + content.add "\L" + let guide = if openPars > 0: guidingInd(em, i) else: -1 + if guide >= 0: + content.add repeat(' ', guide) + lineLen = guide + else: + content.add em.tokens[i] + lineLen = em.tokens[i].len + lineBegin = i+1 + if i+1 < em.kinds.len and em.kinds[i+1] == ltSpaces: + # inhibit extra spaces at the start of a new line + inc i + of ltLit: + let lineSpan = countNewlines(em.tokens[i]) + if lineSpan > 0: + em.calcCol(em.tokens[i]) + lineLen = em.col + else: + inc lineLen, em.tokens[i].len + content.add em.tokens[i] + of ltSomeParLe: + inc openPars + defaultCase() + of ltSomeParRi: + doAssert openPars > 0 + dec openPars + defaultCase() + else: + defaultCase() + inc i + + return content + +type + FinalCheck = proc (content: string; origAst: PNode): bool {.nimcall.} + +proc writeOut*(em: Emitter; content: string; origAst: PNode; check: FinalCheck) = + ## Write to disk + let outFile = em.config.absOutFile + if fileExists(outFile) and readFile(outFile.string) == content: + discard "do nothing, see #9499" + return + + if check(content, origAst): + var f = llStreamOpen(outFile, fmWrite) + if f == nil: + rawMessage(em.config, errGenerated, "cannot open file: " & outFile.string) + return + f.llStreamWrite content + llStreamClose(f) + +proc closeEmitter*(em: var Emitter; origAst: PNode; check: FinalCheck) = + ## Renders emitter tokens and write to a file + let content = renderTokens(em) + em.writeOut(content, origAst, check) + +proc wr(em: var Emitter; x: string; lt: LayoutToken) = + em.tokens.add x + em.kinds.add lt + inc em.col, x.len + assert em.tokens.len == em.kinds.len + +proc wrNewline(em: var Emitter; kind = ltCrucialNewline) = + em.tokens.add "\L" + em.kinds.add kind + em.col = 0 + +proc newlineWasSplitting*(em: var Emitter) = + if em.kinds.len >= 3 and em.kinds[^3] == ltCrucialNewline: + em.kinds[^3] = ltSplittingNewline + +#[ +Splitting newlines can occur: +- after commas, semicolon, '[', '('. +- after binary operators, '='. +- after ':' type + +We only need parser support for the "after type" case. +]# + +proc wrSpaces(em: var Emitter; spaces: int) = + if spaces > 0: + wr(em, strutils.repeat(' ', spaces), ltSpaces) + +proc wrSpace(em: var Emitter) = + wr(em, " ", ltSpaces) + +proc wrTab(em: var Emitter) = + wr(em, " ", ltTab) + +proc beginSection*(em: var Emitter) = + let pos = max(0, em.tokens.len-2) + em.tokens.insert "", pos + em.kinds.insert ltBeginSection, pos + inc em.inSection + +#wr(em, "", ltBeginSection) +proc endSection*(em: var Emitter) = + em.tokens.insert "", em.tokens.len-2 + em.kinds.insert ltEndSection, em.kinds.len-2 + dec em.inSection + +#wr(em, "", ltEndSection) + +proc removeSpaces(em: var Emitter) = + while em.kinds.len > 0 and em.kinds[^1] == ltSpaces: + let tokenLen = em.tokens[^1].len + setLen(em.tokens, em.tokens.len-1) + setLen(em.kinds, em.kinds.len-1) + dec em.col, tokenLen + + +const + openPars = {tkParLe, tkParDotLe, + tkBracketLe, tkBracketDotLe, tkBracketLeColon, + tkCurlyDotLe, tkCurlyLe} + closedPars = {tkParRi, tkParDotRi, + tkBracketRi, tkBracketDotRi, + tkCurlyDotRi, tkCurlyRi} + + splitters = openPars + {tkComma, tkSemiColon} # do not add 'tkColon' here! + oprSet = {tkOpr, tkDiv, tkMod, tkShl, tkShr, tkIn, tkNotin, tkIs, + tkIsnot, tkNot, tkOf, tkAs, tkFrom, tkDotDot, tkAnd, tkOr, tkXor} + +template goodCol(col): bool = col >= em.maxLineLen div 2 + +template moreIndent(em): int = + if em.doIndentMore > 0: em.indWidth*2 else: em.indWidth + +template rememberSplit(kind) = + if goodCol(em.col) and not em.inquote: + let spaces = em.indentLevel+moreIndent(em) + if spaces < em.col and spaces > 0: + wr(em, strutils.repeat(' ', spaces), ltOptionalNewline) + #em.altSplitPos[kind] = em.tokens.len + +proc emitMultilineComment(em: var Emitter, lit: string, col: int; dontIndent: bool) = + # re-align every line in the multi-line comment: + var i = 0 + var lastIndent = if em.keepIndents > 0: em.indentLevel else: em.indentStack[^1] + var b = 0 + var dontIndent = dontIndent + var hasEmptyLine = false + for commentLine in splitLines(lit): + if i == 0 and (commentLine.endsWith("\\") or commentLine.endsWith("[")): + dontIndent = true + wr em, commentLine, ltComment + elif dontIndent: + if i > 0: wrNewline em + wr em, commentLine, ltComment + else: + let stripped = commentLine.strip() + if i == 0: + if em.kinds.len > 0 and em.kinds[^1] != ltTab: + wr(em, "", ltTab) + elif stripped.len == 0: + wrNewline em + hasEmptyLine = true + else: + var a = 0 + while a < commentLine.len and commentLine[a] == ' ': inc a + + if a > lastIndent: + b += em.indWidth + lastIndent = a + elif a < lastIndent: + b -= em.indWidth + lastIndent = a + wrNewline em + if not hasEmptyLine or col + b < 15: + if col + b > 0: + wr(em, repeat(' ', col+b), ltTab) + else: + wr(em, "", ltTab) + else: + wr(em, repeat(' ', a), ltSpaces) + wr em, stripped, ltComment + inc i + +proc lastChar(s: string): char = + result = if s.len > 0: s[s.high] else: '\0' + +proc endsInWhite(em: Emitter): bool = + var i = em.tokens.len-1 + while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i) + result = if i >= 0: em.kinds[i] in {ltSpaces, ltCrucialNewline, ltSplittingNewline, ltTab} else: true + +proc endsInNewline(em: Emitter): bool = + var i = em.tokens.len-1 + while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection, ltSpaces}: dec(i) + result = if i >= 0: em.kinds[i] in {ltCrucialNewline, ltSplittingNewline, ltTab} else: true + +proc endsInAlpha(em: Emitter): bool = + var i = em.tokens.len-1 + while i >= 0 and em.kinds[i] in {ltBeginSection, ltEndSection}: dec(i) + result = if i >= 0: em.tokens[i].lastChar in SymChars+{'_'} else: false + +proc emitComment(em: var Emitter; tok: Token; dontIndent: bool) = + var col = em.col + let lit = strip fileSection(em.config, em.fid, tok.commentOffsetA, tok.commentOffsetB) + em.lineSpan = countNewlines(lit) + if em.lineSpan > 0: calcCol(em, lit) + if em.lineSpan == 0: + if not endsInNewline(em): + wrTab em + wr em, lit, ltComment + else: + if not endsInWhite(em): + wrTab em + inc col + emitMultilineComment(em, lit, col, dontIndent) + +proc emitTok*(em: var Emitter; L: Lexer; tok: Token) = + template wasExportMarker(em): bool = + em.kinds.len > 0 and em.kinds[^1] == ltExportMarker + + if tok.tokType == tkComment and tok.literal.startsWith("#!nimpretty"): + case tok.literal + of "#!nimpretty off": + inc em.keepIndents + wrNewline em + em.lastLineNumber = tok.line + 1 + of "#!nimpretty on": + dec em.keepIndents + em.lastLineNumber = tok.line + wrNewline em + wr em, tok.literal, ltComment + em.col = 0 + em.lineSpan = 0 + return + + var preventComment = false + if tok.tokType == tkComment and tok.line == em.lastLineNumber: + # we have an inline comment so handle it before the indentation token: + emitComment(em, tok, dontIndent = (em.inSection == 0)) + preventComment = true + em.fixedUntil = em.tokens.high + + elif tok.indent >= 0: + var newlineKind = ltCrucialNewline + if em.keepIndents > 0: + em.indentLevel = tok.indent + elif (em.lastTok in (splitters + oprSet) and + tok.tokType notin (closedPars - {tkBracketDotRi})): + if tok.tokType in openPars and tok.indent > em.indentStack[^1]: + while em.indentStack[^1] < tok.indent: + em.indentStack.add(em.indentStack[^1] + em.indWidth) + while em.indentStack[^1] > tok.indent: + discard em.indentStack.pop() + + # aka: we are in an expression context: + let alignment = max(tok.indent - em.indentStack[^1], 0) + em.indentLevel = alignment + em.indentStack.high * em.indWidth + newlineKind = ltSplittingNewline + else: + if tok.indent > em.indentStack[^1]: + em.indentStack.add tok.indent + else: + # dedent? + while em.indentStack.len > 1 and em.indentStack[^1] > tok.indent: + discard em.indentStack.pop() + em.indentLevel = em.indentStack.high * em.indWidth + #[ we only correct the indentation if it is not in an expression context, + so that code like + + const splitters = {tkComma, tkSemicolon, tkParLe, tkParDotLe, + tkBracketLe, tkBracketLeColon, tkCurlyDotLe, + tkCurlyLe} + + is not touched. + ]# + # remove trailing whitespace: + removeSpaces em + wrNewline em, newlineKind + for i in 2..tok.line - em.lastLineNumber: wrNewline(em) + wrSpaces em, em.indentLevel + em.fixedUntil = em.tokens.high + + var lastTokWasTerse = false + case tok.tokType + of tokKeywordLow..tokKeywordHigh: + if endsInAlpha(em): + wrSpace em + elif not em.inquote and not endsInWhite(em) and + em.lastTok notin (openPars+{tkOpr, tkDotDot}) and not em.lastTokWasTerse: + #and tok.tokType in oprSet + wrSpace em + + if not em.inquote: + wr(em, $tok.tokType, ltKeyword) + if tok.tokType in {tkAnd, tkOr, tkIn, tkNotin}: + rememberSplit(splitIn) + wrSpace em + else: + # keywords in backticks are not normalized: + wr(em, tok.ident.s, ltIdent) + + of tkColon: + wr(em, $tok.tokType, ltOther) + wrSpace em + of tkSemiColon, tkComma: + wr(em, $tok.tokType, ltOther) + rememberSplit(splitComma) + wrSpace em + of openPars: + if tsLeading in tok.spacing and not em.endsInWhite and + (not em.wasExportMarker or tok.tokType == tkCurlyDotLe): + wrSpace em + wr(em, $tok.tokType, ltSomeParLe) + if tok.tokType != tkCurlyDotLe: + rememberSplit(splitParLe) + of closedPars: + wr(em, $tok.tokType, ltSomeParRi) + of tkColonColon: + wr(em, $tok.tokType, ltOther) + of tkDot: + lastTokWasTerse = true + wr(em, $tok.tokType, ltOther) + of tkEquals: + if not em.inquote and not em.endsInWhite: wrSpace(em) + wr(em, $tok.tokType, ltOther) + if not em.inquote: wrSpace(em) + of tkOpr, tkDotDot: + if em.inquote or (tok.spacing == {} and + tok.ident.s notin ["<", ">", "<=", ">=", "==", "!="]): + # bug #9504: remember to not spacify a keyword: + lastTokWasTerse = true + # if not surrounded by whitespace, don't produce any whitespace either: + wr(em, tok.ident.s, ltOpr) + else: + if not em.endsInWhite: wrSpace(em) + wr(em, tok.ident.s, ltOpr) + template isUnary(tok): bool = + tok.spacing == {tsLeading} + + if not isUnary(tok): + rememberSplit(splitBinary) + wrSpace(em) + of tkAccent: + if not em.inquote and endsInAlpha(em): wrSpace(em) + wr(em, $tok.tokType, ltOther) + em.inquote = not em.inquote + of tkComment: + if not preventComment: + emitComment(em, tok, dontIndent = false) + of tkIntLit..tkStrLit, tkRStrLit, tkTripleStrLit, tkGStrLit, tkGTripleStrLit, tkCharLit: + if not em.inquote: + let lit = fileSection(em.config, em.fid, tok.offsetA, tok.offsetB) + if endsInAlpha(em) and tok.tokType notin {tkGStrLit, tkGTripleStrLit}: wrSpace(em) + em.lineSpan = countNewlines(lit) + if em.lineSpan > 0: calcCol(em, lit) + wr em, lit, ltLit + else: + if endsInAlpha(em): wrSpace(em) + wr em, tok.literal, ltLit + of tkEof: discard + else: + let lit = if tok.ident != nil: tok.ident.s else: tok.literal + if endsInAlpha(em): wrSpace(em) + wr em, lit, ltIdent + + em.lastTok = tok.tokType + em.lastTokWasTerse = lastTokWasTerse + em.lastLineNumber = tok.line + em.lineSpan + em.lineSpan = 0 + +proc endsWith(em: Emitter; k: varargs[string]): bool = + if em.tokens.len < k.len: return false + for i in 0..high(k): + if em.tokens[em.tokens.len - k.len + i] != k[i]: return false + return true + +proc rfind(em: Emitter, t: string): int = + for i in 1..5: + if em.tokens[^i] == t: + return i + +proc starWasExportMarker*(em: var Emitter) = + if em.endsWith(" ", "*", " "): + setLen(em.tokens, em.tokens.len-3) + setLen(em.kinds, em.kinds.len-3) + em.tokens.add("*") + em.kinds.add ltExportMarker + dec em.col, 2 + +proc commaWasSemicolon*(em: var Emitter) = + if em.semicolons == detectSemicolonKind: + em.semicolons = if em.rfind(";") > 0: useSemicolon else: dontTouch + if em.semicolons == useSemicolon: + let commaPos = em.rfind(",") + if commaPos > 0: + em.tokens[^commaPos] = ";" + +proc curlyRiWasPragma*(em: var Emitter) = + if em.endsWith("}"): + em.tokens[^1] = ".}" + inc em.col |