diff options
author | Arne Döring <arne.doering@gmx.net> | 2019-02-28 22:57:57 +0100 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2019-02-28 22:57:57 +0100 |
commit | 1102b8ac6e643c8f8428dd7db0994d26b0c65ea6 (patch) | |
tree | b08388f89e7867f03e5d59be00db70a6535752dc /compiler | |
parent | 728ff1004a60835c18c44b64830ea08dc805485e (diff) | |
download | Nim-1102b8ac6e643c8f8428dd7db0994d26b0c65ea6.tar.gz |
StringStream and parseJson, parseCfg, parseSql et al for the vm (#10746)
Diffstat (limited to 'compiler')
-rw-r--r-- | compiler/ccgmerge.nim | 16 | ||||
-rw-r--r-- | compiler/lexer.nim | 123 | ||||
-rw-r--r-- | compiler/nimlexbase.nim | 32 | ||||
-rw-r--r-- | compiler/vmgen.nim | 2 |
4 files changed, 73 insertions, 100 deletions
diff --git a/compiler/ccgmerge.nim b/compiler/ccgmerge.nim index ccb5a7635..56b17440e 100644 --- a/compiler/ccgmerge.nim +++ b/compiler/ccgmerge.nim @@ -145,38 +145,34 @@ proc atEndMark(buf: cstring, pos: int): bool = proc readVerbatimSection(L: var TBaseLexer): Rope = var pos = L.bufpos - var buf = L.buf var r = newStringOfCap(30_000) while true: - case buf[pos] + case L.buf[pos] of CR: pos = nimlexbase.handleCR(L, pos) - buf = L.buf r.add('\L') of LF: pos = nimlexbase.handleLF(L, pos) - buf = L.buf r.add('\L') of '\0': doAssert(false, "ccgmerge: expected: " & NimMergeEndMark) break else: - if atEndMark(buf, pos): + if atEndMark(L.buf, pos): inc pos, NimMergeEndMark.len break - r.add(buf[pos]) + r.add(L.buf[pos]) inc pos L.bufpos = pos result = r.rope proc readKey(L: var TBaseLexer, result: var string) = var pos = L.bufpos - var buf = L.buf setLen(result, 0) - while buf[pos] in IdentChars: - result.add(buf[pos]) + while L.buf[pos] in IdentChars: + result.add(L.buf[pos]) inc pos - if buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected") + if L.buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected") L.bufpos = pos + 1 # skip ':' proc newFakeType(id: int): PType = diff --git a/compiler/lexer.nim b/compiler/lexer.nim index 5eaa4c09f..0dd6245b0 100644 --- a/compiler/lexer.nim +++ b/compiler/lexer.nim @@ -318,17 +318,16 @@ template eatChar(L: var TLexer, t: var TToken) = proc getNumber(L: var TLexer, result: var TToken) = proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]): Natural = var pos = L.bufpos # use registers for pos, buf - var buf = L.buf result = 0 while true: - if buf[pos] in chars: - add(tok.literal, buf[pos]) + if L.buf[pos] in chars: + add(tok.literal, L.buf[pos]) inc(pos) inc(result) else: break - if buf[pos] == '_': - if buf[pos+1] notin chars: + if L.buf[pos] == '_': + if L.buf[pos+1] notin chars: lexMessage(L, errGenerated, "only single underscores may occur in a token and token may not " & "end with an underscore: e.g. '1__1' and '1_' are invalid") @@ -339,9 +338,8 @@ proc getNumber(L: var TLexer, result: var TToken) = proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) = var pos = L.bufpos # use registers for pos, buf - var buf = L.buf - while buf[pos] in chars: - add(tok.literal, buf[pos]) + while L.buf[pos] in chars: + add(tok.literal, L.buf[pos]) inc(pos) L.bufpos = pos @@ -800,25 +798,23 @@ type proc getString(L: var TLexer, tok: var TToken, mode: StringMode) = var pos = L.bufpos - var buf = L.buf # put `buf` in a register var line = L.lineNumber # save linenumber for better error message tokenBegin(tok, pos - ord(mode == raw)) inc pos # skip " - if buf[pos] == '\"' and buf[pos+1] == '\"': + if L.buf[pos] == '\"' and L.buf[pos+1] == '\"': tok.tokType = tkTripleStrLit # long string literal: inc(pos, 2) # skip "" # skip leading newline: - if buf[pos] in {' ', '\t'}: + if L.buf[pos] in {' ', '\t'}: var newpos = pos+1 - while buf[newpos] in {' ', '\t'}: inc newpos - if buf[newpos] in {CR, LF}: pos = newpos + while L.buf[newpos] in {' ', '\t'}: inc newpos + if L.buf[newpos] in {CR, LF}: pos = newpos pos = handleCRLF(L, pos) - buf = L.buf while true: - case buf[pos] + case L.buf[pos] of '\"': - if buf[pos+1] == '\"' and buf[pos+2] == '\"' and - buf[pos+3] != '\"': + if L.buf[pos+1] == '\"' and L.buf[pos+2] == '\"' and + L.buf[pos+3] != '\"': tokenEndIgnore(tok, pos+2) L.bufpos = pos + 3 # skip the three """ break @@ -827,7 +823,6 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) = of CR, LF: tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf add(tok.literal, "\n") of nimlexbase.EndOfFile: tokenEndIgnore(tok, pos) @@ -838,16 +833,16 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) = L.bufpos = pos break else: - add(tok.literal, buf[pos]) + add(tok.literal, L.buf[pos]) inc(pos) else: # ordinary string literal if mode != normal: tok.tokType = tkRStrLit else: tok.tokType = tkStrLit while true: - var c = buf[pos] + var c = L.buf[pos] if c == '\"': - if mode != normal and buf[pos+1] == '\"': + if mode != normal and L.buf[pos+1] == '\"': inc(pos, 2) add(tok.literal, '"') else: @@ -885,10 +880,9 @@ proc getCharacter(L: var TLexer, tok: var TToken) = proc getSymbol(L: var TLexer, tok: var TToken) = var h: Hash = 0 var pos = L.bufpos - var buf = L.buf tokenBegin(tok, pos) while true: - var c = buf[pos] + var c = L.buf[pos] case c of 'a'..'z', '0'..'9', '\x80'..'\xFF': h = h !& ord(c) @@ -898,7 +892,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) = h = h !& ord(c) inc(pos) of '_': - if buf[pos+1] notin SymChars: + if L.buf[pos+1] notin SymChars: lexMessage(L, errGenerated, "invalid token: trailing underscore") break inc(pos) @@ -923,11 +917,10 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int, proc getOperator(L: var TLexer, tok: var TToken) = var pos = L.bufpos - var buf = L.buf tokenBegin(tok, pos) var h: Hash = 0 while true: - var c = buf[pos] + var c = L.buf[pos] if c notin OpChars: break h = h !& ord(c) inc(pos) @@ -936,10 +929,10 @@ proc getOperator(L: var TLexer, tok: var TToken) = # advance pos but don't store it in L.bufpos so the next token (which might # be an operator too) gets the preceding spaces: tok.strongSpaceB = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc pos inc tok.strongSpaceB - if buf[pos] in {CR, LF, nimlexbase.EndOfFile}: + if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}: tok.strongSpaceB = -1 proc getPrecedence*(tok: TToken, strongSpaces: bool): int = @@ -980,9 +973,8 @@ proc getPrecedence*(tok: TToken, strongSpaces: bool): int = proc newlineFollows*(L: TLexer): bool = var pos = L.bufpos - var buf = L.buf while true: - case buf[pos] + case L.buf[pos] of ' ', '\t': inc(pos) of CR, LF: @@ -990,49 +982,47 @@ proc newlineFollows*(L: TLexer): bool = break of '#': inc(pos) - if buf[pos] == '#': inc(pos) - if buf[pos] != '[': return true + if L.buf[pos] == '#': inc(pos) + if L.buf[pos] != '[': return true else: break proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; isDoc: bool) = var pos = start - var buf = L.buf var toStrip = 0 tokenBegin(tok, pos) # detect the amount of indentation: if isDoc: toStrip = getColNumber(L, pos) - while buf[pos] == ' ': inc pos - if buf[pos] in {CR, LF}: + while L.buf[pos] == ' ': inc pos + if L.buf[pos] in {CR, LF}: pos = handleCRLF(L, pos) - buf = L.buf toStrip = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc pos inc toStrip var nesting = 0 while true: - case buf[pos] + case L.buf[pos] of '#': if isDoc: - if buf[pos+1] == '#' and buf[pos+2] == '[': + if L.buf[pos+1] == '#' and L.buf[pos+2] == '[': inc nesting tok.literal.add '#' - elif buf[pos+1] == '[': + elif L.buf[pos+1] == '[': inc nesting inc pos of ']': if isDoc: - if buf[pos+1] == '#' and buf[pos+2] == '#': + if L.buf[pos+1] == '#' and L.buf[pos+2] == '#': if nesting == 0: tokenEndIgnore(tok, pos+2) inc(pos, 3) break dec nesting tok.literal.add ']' - elif buf[pos+1] == '#': + elif L.buf[pos+1] == '#': if nesting == 0: tokenEndIgnore(tok, pos+1) inc(pos, 2) @@ -1042,14 +1032,13 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; of CR, LF: tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf # strip leading whitespace: when defined(nimpretty): tok.literal.add "\L" if isDoc: when not defined(nimpretty): tok.literal.add "\n" inc tok.iNumber var c = toStrip - while buf[pos] == ' ' and c > 0: + while L.buf[pos] == ' ' and c > 0: inc pos dec c of nimlexbase.EndOfFile: @@ -1057,7 +1046,7 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; lexMessagePos(L, errGenerated, pos, "end of multiline comment expected") break else: - if isDoc or defined(nimpretty): tok.literal.add buf[pos] + if isDoc or defined(nimpretty): tok.literal.add L.buf[pos] inc(pos) L.bufpos = pos when defined(nimpretty): @@ -1065,49 +1054,47 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int; proc scanComment(L: var TLexer, tok: var TToken) = var pos = L.bufpos - var buf = L.buf tok.tokType = tkComment # iNumber contains the number of '\n' in the token tok.iNumber = 0 - assert buf[pos+1] == '#' + assert L.buf[pos+1] == '#' when defined(nimpretty): tok.commentOffsetA = L.offsetBase + pos - 1 - if buf[pos+2] == '[': + if L.buf[pos+2] == '[': skipMultiLineComment(L, tok, pos+3, true) return tokenBegin(tok, pos) inc(pos, 2) var toStrip = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc pos inc toStrip while true: var lastBackslash = -1 - while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: - if buf[pos] == '\\': lastBackslash = pos+1 - add(tok.literal, buf[pos]) + while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: + if L.buf[pos] == '\\': lastBackslash = pos+1 + add(tok.literal, L.buf[pos]) inc(pos) tokenEndIgnore(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf var indent = 0 - while buf[pos] == ' ': + while L.buf[pos] == ' ': inc(pos) inc(indent) - if buf[pos] == '#' and buf[pos+1] == '#': + if L.buf[pos] == '#' and L.buf[pos+1] == '#': tok.literal.add "\n" inc(pos, 2) var c = toStrip - while buf[pos] == ' ' and c > 0: + while L.buf[pos] == ' ' and c > 0: inc pos dec c inc tok.iNumber else: - if buf[pos] > ' ': + if L.buf[pos] > ' ': L.indentAhead = indent tokenEndIgnore(tok, pos) break @@ -1117,7 +1104,6 @@ proc scanComment(L: var TLexer, tok: var TToken) = proc skip(L: var TLexer, tok: var TToken) = var pos = L.bufpos - var buf = L.buf tokenBegin(tok, pos) tok.strongSpaceA = 0 when defined(nimpretty): @@ -1127,7 +1113,7 @@ proc skip(L: var TLexer, tok: var TToken) = tok.commentOffsetB = tok.commentOffsetA tok.line = -1 while true: - case buf[pos] + case L.buf[pos] of ' ': inc(pos) inc(tok.strongSpaceA) @@ -1137,13 +1123,12 @@ proc skip(L: var TLexer, tok: var TToken) = of CR, LF: tokenEndPrevious(tok, pos) pos = handleCRLF(L, pos) - buf = L.buf var indent = 0 while true: - if buf[pos] == ' ': + if L.buf[pos] == ' ': inc(pos) inc(indent) - elif buf[pos] == '#' and buf[pos+1] == '[': + elif L.buf[pos] == '#' and L.buf[pos+1] == '[': when defined(nimpretty): hasComment = true if tok.line < 0: @@ -1151,32 +1136,30 @@ proc skip(L: var TLexer, tok: var TToken) = commentIndent = indent skipMultiLineComment(L, tok, pos+2, false) pos = L.bufpos - buf = L.buf else: break tok.strongSpaceA = 0 when defined(nimpretty): - if buf[pos] == '#' and tok.line < 0: commentIndent = indent - if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'): + if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent + if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'): tok.indent = indent L.currLineIndent = indent break of '#': # do not skip documentation comment: - if buf[pos+1] == '#': break + if L.buf[pos+1] == '#': break when defined(nimpretty): hasComment = true if tok.line < 0: tok.line = L.lineNumber - if buf[pos+1] == '[': + if L.buf[pos+1] == '[': skipMultiLineComment(L, tok, pos+2, false) pos = L.bufpos - buf = L.buf else: tokenBegin(tok, pos) - while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: - when defined(nimpretty): tok.literal.add buf[pos] + while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}: + when defined(nimpretty): tok.literal.add L.buf[pos] inc(pos) tokenEndIgnore(tok, pos+1) when defined(nimpretty): diff --git a/compiler/nimlexbase.nim b/compiler/nimlexbase.nim index 2e7416645..214147a2b 100644 --- a/compiler/nimlexbase.nim +++ b/compiler/nimlexbase.nim @@ -39,8 +39,7 @@ const type TBaseLexer* = object of RootObj bufpos*: int - buf*: cstring - bufLen*: int # length of buffer in characters + buf*: string stream*: PLLStream # we read from this stream lineNumber*: int # the current line number # private data: @@ -65,11 +64,7 @@ proc handleLF*(L: var TBaseLexer, pos: int): int # of the LF. # implementation -const - chrSize = sizeof(char) - proc closeBaseLexer(L: var TBaseLexer) = - dealloc(L.buf) llStreamClose(L.stream) proc fillBuffer(L: var TBaseLexer) = @@ -80,14 +75,13 @@ proc fillBuffer(L: var TBaseLexer) = oldBufLen: int # we know here that pos == L.sentinel, but not if this proc # is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen) - toCopy = L.bufLen - L.sentinel - 1 + assert(L.sentinel < L.buf.len) + toCopy = L.buf.len - L.sentinel - 1 assert(toCopy >= 0) if toCopy > 0: - moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) + moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy) # "moveMem" handles overlapping regions - charsRead = llStreamRead(L.stream, addr(L.buf[toCopy]), - (L.sentinel + 1) * chrSize) div chrSize + charsRead = llStreamRead(L.stream, addr L.buf[toCopy], L.sentinel + 1) s = toCopy + charsRead if charsRead < L.sentinel + 1: L.buf[s] = EndOfFile # set end marker @@ -96,7 +90,7 @@ proc fillBuffer(L: var TBaseLexer) = # compute sentinel: dec(s) # BUGFIX (valgrind) while true: - assert(s < L.bufLen) + assert(s < L.buf.len) while (s >= 0) and not (L.buf[s] in NewLines): dec(s) if s >= 0: # we found an appropriate character for a sentinel: @@ -105,17 +99,16 @@ proc fillBuffer(L: var TBaseLexer) = else: # rather than to give up here because the line is too long, # double the buffer's size and try again: - oldBufLen = L.bufLen - L.bufLen = L.bufLen * 2 - L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) - assert(L.bufLen - oldBufLen == oldBufLen) + oldBufLen = L.buf.len + L.buf.setLen(L.buf.len * 2) + assert(L.buf.len - oldBufLen == oldBufLen) charsRead = llStreamRead(L.stream, addr(L.buf[oldBufLen]), - oldBufLen * chrSize) div chrSize + oldBufLen) if charsRead < oldBufLen: L.buf[oldBufLen + charsRead] = EndOfFile L.sentinel = oldBufLen + charsRead break - s = L.bufLen - 1 + s = L.buf.len - 1 proc fillBaseLexer(L: var TBaseLexer, pos: int): int = assert(pos <= L.sentinel) @@ -149,8 +142,7 @@ proc openBaseLexer(L: var TBaseLexer, inputstream: PLLStream, bufLen = 8192) = assert(bufLen > 0) L.bufpos = 0 L.offsetBase = 0 - L.bufLen = bufLen - L.buf = cast[cstring](alloc(bufLen * chrSize)) + L.buf = newString(bufLen) L.sentinel = bufLen - 1 L.lineStart = 0 L.lineNumber = 1 # lines start at 1 diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim index ab2ac8707..092c25a46 100644 --- a/compiler/vmgen.nim +++ b/compiler/vmgen.nim @@ -2039,6 +2039,8 @@ proc gen(c: PCtx; n: PNode; dest: var TDest; flags: TGenFlags = {}) = genConv(c, n, n.sons[1], dest) of nkObjDownConv: genConv(c, n, n.sons[0], dest) + of nkObjUpConv: + genConv(c, n, n.sons[0], dest) of nkVarSection, nkLetSection: unused(c, n, dest) genVarSection(c, n) |