diff options
Diffstat (limited to 'compiler/lexer.nim')
-rw-r--r-- | compiler/lexer.nim | 86 |
1 files changed, 51 insertions, 35 deletions
diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 45d090b16..68b0164d4 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -126,6 +126,9 @@ type literal*: string # the parsed (string) literal; and # documentation comments are here too line*, col*: int + when defined(nimpretty): + offsetA*, offsetB*: int # used for pretty printing so that literals + # like 0b01 or r"\L" are unaffected TErrorHandler* = proc (info: TLineInfo; msg: TMsgKind; arg: string) TLexer* = object of TBaseLexer @@ -144,7 +147,10 @@ type var gLinesCompiled*: int # all lines that have been compiled proc getLineInfo*(L: TLexer, tok: TToken): TLineInfo {.inline.} = - newLineInfo(L.fileIdx, tok.line, tok.col) + result = newLineInfo(L.fileIdx, tok.line, tok.col) + when defined(nimpretty): + result.offsetA = tok.offsetA + result.offsetB = tok.offsetB proc isKeyword*(kind: TTokType): bool = result = (kind >= tokKeywordLow) and (kind <= tokKeywordHigh) @@ -245,11 +251,13 @@ proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") = proc matchTwoChars(L: TLexer, first: char, second: set[char]): bool = result = (L.buf[L.bufpos] == first) and (L.buf[L.bufpos + 1] in second) -template tokenBegin(pos) {.dirty.} = +template tokenBegin(tok, pos) {.dirty.} = when defined(nimsuggest): var colA = getColNumber(L, pos) + when defined(nimpretty): + tok.offsetA = L.offsetBase + pos -template tokenEnd(pos) {.dirty.} = +template tokenEnd(tok, pos) {.dirty.} = when defined(nimsuggest): let colB = getColNumber(L, pos)+1 if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and @@ -257,8 +265,10 @@ template tokenEnd(pos) {.dirty.} = L.cursor = CursorPosition.InToken gTrackPos.col = colA.int16 colA = 0 + when defined(nimpretty): + tok.offsetB = L.offsetBase + pos -template tokenEndIgnore(pos) = +template tokenEndIgnore(tok, pos) = when defined(nimsuggest): let colB = getColNumber(L, pos) if L.fileIdx == gTrackPos.fileIndex and gTrackPos.col in colA..colB and @@ -266,8 +276,10 @@ template tokenEndIgnore(pos) = gTrackPos.fileIndex = trackPosInvalidFileIdx gTrackPos.line = -1 colA = 0 + when defined(nimpretty): + tok.offsetB = L.offsetBase + pos -template tokenEndPrevious(pos) = +template tokenEndPrevious(tok, pos) = when defined(nimsuggest): # when we detect the cursor in whitespace, we attach the track position # to the token that came before that, but only if we haven't detected @@ -279,6 +291,8 @@ template tokenEndPrevious(pos) = gTrackPos = L.previousToken gTrackPosAttached = true colA = 0 + when defined(nimpretty): + tok.offsetB = L.offsetBase + pos {.push overflowChecks: off.} # We need to parse the largest uint literal without overflow checks @@ -363,7 +377,7 @@ proc getNumber(L: var TLexer, result: var TToken) = result.literal = "" result.base = base10 startpos = L.bufpos - tokenBegin(startPos) + tokenBegin(result, startPos) # First stage: find out base, make verifications, build token literal string if L.buf[L.bufpos] == '0' and L.buf[L.bufpos + 1] in baseCodeChars + {'O'}: @@ -573,7 +587,7 @@ proc getNumber(L: var TLexer, result: var TToken) = lexMessageLitNum(L, errInvalidNumber, startpos) except OverflowError, RangeError: lexMessageLitNum(L, errNumberOutOfRange, startpos) - tokenEnd(postPos-1) + tokenEnd(result, postPos-1) L.bufpos = postPos proc handleHexChar(L: var TLexer, xi: var int) = @@ -691,10 +705,11 @@ proc handleCRLF(L: var TLexer, pos: int): int = else: result = pos proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = - var pos = L.bufpos + 1 # skip " + var pos = L.bufpos var buf = L.buf # put `buf` in a register var line = L.lineNumber # save linenumber for better error message - tokenBegin(pos) + tokenBegin(tok, pos) + inc pos # skip " if buf[pos] == '\"' and buf[pos+1] == '\"': tok.tokType = tkTripleStrLit # long string literal: inc(pos, 2) # skip "" @@ -710,18 +725,18 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = of '\"': if buf[pos+1] == '\"' and buf[pos+2] == '\"' and buf[pos+3] != '\"': - tokenEndIgnore(pos+2) + tokenEndIgnore(tok, pos+2) L.bufpos = pos + 3 # skip the three """ break add(tok.literal, '\"') inc(pos) of CR, LF: - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) buf = L.buf add(tok.literal, tnl) of nimlexbase.EndOfFile: - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) var line2 = L.lineNumber L.lineNumber = line lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart) @@ -742,11 +757,11 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = inc(pos, 2) add(tok.literal, '"') else: - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) inc(pos) # skip '"' break elif c in {CR, LF, nimlexbase.EndOfFile}: - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) lexMessage(L, errClosingQuoteExpected) break elif (c == '\\') and not rawMode: @@ -759,7 +774,7 @@ proc getString(L: var TLexer, tok: var TToken, rawMode: bool) = L.bufpos = pos proc getCharacter(L: var TLexer, tok: var TToken) = - tokenBegin(L.bufpos) + tokenBegin(tok, L.bufpos) inc(L.bufpos) # skip ' var c = L.buf[L.bufpos] case c @@ -769,14 +784,14 @@ proc getCharacter(L: var TLexer, tok: var TToken) = tok.literal = $c inc(L.bufpos) if L.buf[L.bufpos] != '\'': lexMessage(L, errMissingFinalQuote) - tokenEndIgnore(L.bufpos) + tokenEndIgnore(tok, L.bufpos) inc(L.bufpos) # skip ' proc getSymbol(L: var TLexer, tok: var TToken) = var h: Hash = 0 var pos = L.bufpos var buf = L.buf - tokenBegin(pos) + tokenBegin(tok, pos) while true: var c = buf[pos] case c @@ -793,7 +808,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) = break inc(pos) else: break - tokenEnd(pos-1) + tokenEnd(tok, pos-1) h = !$h tok.ident = L.cache.getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h) L.bufpos = pos @@ -814,7 +829,7 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int, proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - tokenBegin(pos) + tokenBegin(tok, pos) var h: Hash = 0 while true: var c = buf[pos] @@ -822,7 +837,7 @@ proc getOperator(L: var TLexer, tok: var TToken) = h = h !& ord(c) inc(pos) endOperator(L, tok, pos, h) - tokenEnd(pos-1) + tokenEnd(tok, pos-1) # advance pos but don't store it in L.bufpos so the next token (which might # be an operator too) gets the preceding spaces: tok.strongSpaceB = 0 @@ -837,7 +852,7 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; var pos = start var buf = L.buf var toStrip = 0 - tokenBegin(pos) + tokenBegin(tok, pos) # detect the amount of indentation: if isDoc: toStrip = getColNumber(L, pos) @@ -864,36 +879,37 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; if isDoc: if buf[pos+1] == '#' and buf[pos+2] == '#': if nesting == 0: - tokenEndIgnore(pos+2) + tokenEndIgnore(tok, pos+2) inc(pos, 3) break dec nesting tok.literal.add ']' elif buf[pos+1] == '#': if nesting == 0: - tokenEndIgnore(pos+1) + tokenEndIgnore(tok, pos+1) inc(pos, 2) break dec nesting inc pos of CR, LF: - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) buf = L.buf # strip leading whitespace: + when defined(nimpretty): tok.literal.add "\L" if isDoc: - tok.literal.add "\n" + when not defined(nimpretty): tok.literal.add "\n" inc tok.iNumber var c = toStrip while buf[pos] == ' ' and c > 0: inc pos dec c of nimlexbase.EndOfFile: - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) lexMessagePos(L, errGenerated, pos, "end of multiline comment expected") break else: - if isDoc: tok.literal.add buf[pos] + if isDoc or defined(nimpretty): tok.literal.add buf[pos] inc(pos) L.bufpos = pos @@ -907,7 +923,7 @@ proc scanComment(L: var TLexer, tok: var TToken) = if buf[pos+2] == '[': skipMultiLineComment(L, tok, pos+3, true) return - tokenBegin(pos) + tokenBegin(tok, pos) inc(pos, 2) var toStrip = 0 @@ -921,7 +937,7 @@ proc scanComment(L: var TLexer, tok: var TToken) = if buf[pos] == '\\': lastBackslash = pos+1 add(tok.literal, buf[pos]) inc(pos) - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) buf = L.buf var indent = 0 @@ -940,14 +956,14 @@ proc scanComment(L: var TLexer, tok: var TToken) = else: if buf[pos] > ' ': L.indentAhead = indent - tokenEndIgnore(pos) + tokenEndIgnore(tok, pos) break L.bufpos = pos proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos var buf = L.buf - tokenBegin(pos) + tokenBegin(tok, pos) tok.strongSpaceA = 0 while true: case buf[pos] @@ -958,7 +974,7 @@ proc skip(L: var TLexer, tok: var TToken) = if not L.allowTabs: lexMessagePos(L, errTabulatorsAreNotAllowed, pos) inc(pos) of CR, LF: - tokenEndPrevious(pos) + tokenEndPrevious(tok, pos) pos = handleCRLF(L, pos) buf = L.buf var indent = 0 @@ -985,12 +1001,12 @@ proc skip(L: var TLexer, tok: var TToken) = pos = L.bufpos buf = L.buf else: - tokenBegin(pos) + tokenBegin(tok, pos) while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: inc(pos) - tokenEndIgnore(pos+1) + tokenEndIgnore(tok, pos+1) else: break # EndOfFile also leaves the loop - tokenEndPrevious(pos-1) + tokenEndPrevious(tok, pos-1) L.bufpos = pos proc rawGetTok*(L: var TLexer, tok: var TToken) = |