StringStream and parseJson, parseCfg, parseSql et al for the vm (#10746)

author: Arne Döring <arne.doering@gmx.net> 2019-02-28 22:57:57 +0100
committer: Andreas Rumpf <rumpf_a@web.de> 2019-02-28 22:57:57 +0100
commit: 1102b8ac6e643c8f8428dd7db0994d26b0c65ea6 (patch)
tree: b08388f89e7867f03e5d59be00db70a6535752dc /compiler
parent: 728ff1004a60835c18c44b64830ea08dc805485e (diff)
download: Nim-1102b8ac6e643c8f8428dd7db0994d26b0c65ea6.tar.gz
4 files changed, 73 insertions, 100 deletions
diff --git a/compiler/ccgmerge.nim b/compiler/ccgmerge.nim
index ccb5a7635..56b17440e 100644
--- a/compiler/ccgmerge.nim
+++ b/compiler/ccgmerge.nim
@@ -145,38 +145,34 @@ proc atEndMark(buf: cstring, pos: int): bool =
 
 proc readVerbatimSection(L: var TBaseLexer): Rope =
   var pos = L.bufpos
-  var buf = L.buf
   var r = newStringOfCap(30_000)
   while true:
-    case buf[pos]
+    case L.buf[pos]
     of CR:
       pos = nimlexbase.handleCR(L, pos)
-      buf = L.buf
       r.add('\L')
     of LF:
       pos = nimlexbase.handleLF(L, pos)
-      buf = L.buf
       r.add('\L')
     of '\0':
       doAssert(false, "ccgmerge: expected: " & NimMergeEndMark)
       break
     else:
-      if atEndMark(buf, pos):
+      if atEndMark(L.buf, pos):
         inc pos, NimMergeEndMark.len
         break
-      r.add(buf[pos])
+      r.add(L.buf[pos])
       inc pos
   L.bufpos = pos
   result = r.rope
 
 proc readKey(L: var TBaseLexer, result: var string) =
   var pos = L.bufpos
-  var buf = L.buf
   setLen(result, 0)
-  while buf[pos] in IdentChars:
-    result.add(buf[pos])
+  while L.buf[pos] in IdentChars:
+    result.add(L.buf[pos])
     inc pos
-  if buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected")
+  if L.buf[pos] != ':': doAssert(false, "ccgmerge: ':' expected")
   L.bufpos = pos + 1 # skip ':'
 
 proc newFakeType(id: int): PType =
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index 5eaa4c09f..0dd6245b0 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -318,17 +318,16 @@ template eatChar(L: var TLexer, t: var TToken) =
 proc getNumber(L: var TLexer, result: var TToken) =
   proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: set[char]): Natural =
     var pos = L.bufpos              # use registers for pos, buf
-    var buf = L.buf
     result = 0
     while true:
-      if buf[pos] in chars:
-        add(tok.literal, buf[pos])
+      if L.buf[pos] in chars:
+        add(tok.literal, L.buf[pos])
         inc(pos)
         inc(result)
       else:
         break
-      if buf[pos] == '_':
-        if buf[pos+1] notin chars:
+      if L.buf[pos] == '_':
+        if L.buf[pos+1] notin chars:
           lexMessage(L, errGenerated,
             "only single underscores may occur in a token and token may not " &
             "end with an underscore: e.g. '1__1' and '1_' are invalid")
@@ -339,9 +338,8 @@ proc getNumber(L: var TLexer, result: var TToken) =
 
   proc matchChars(L: var TLexer, tok: var TToken, chars: set[char]) =
     var pos = L.bufpos              # use registers for pos, buf
-    var buf = L.buf
-    while buf[pos] in chars:
-      add(tok.literal, buf[pos])
+    while L.buf[pos] in chars:
+      add(tok.literal, L.buf[pos])
       inc(pos)
     L.bufpos = pos
 
@@ -800,25 +798,23 @@ type
 
 proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
   var pos = L.bufpos
-  var buf = L.buf                 # put `buf` in a register
   var line = L.lineNumber         # save linenumber for better error message
   tokenBegin(tok, pos - ord(mode == raw))
   inc pos # skip "
-  if buf[pos] == '\"' and buf[pos+1] == '\"':
+  if L.buf[pos] == '\"' and L.buf[pos+1] == '\"':
     tok.tokType = tkTripleStrLit # long string literal:
     inc(pos, 2)               # skip ""
     # skip leading newline:
-    if buf[pos] in {' ', '\t'}:
+    if L.buf[pos] in {' ', '\t'}:
       var newpos = pos+1
-      while buf[newpos] in {' ', '\t'}: inc newpos
-      if buf[newpos] in {CR, LF}: pos = newpos
+      while L.buf[newpos] in {' ', '\t'}: inc newpos
+      if L.buf[newpos] in {CR, LF}: pos = newpos
     pos = handleCRLF(L, pos)
-    buf = L.buf
     while true:
-      case buf[pos]
+      case L.buf[pos]
       of '\"':
-        if buf[pos+1] == '\"' and buf[pos+2] == '\"' and
-            buf[pos+3] != '\"':
+        if L.buf[pos+1] == '\"' and L.buf[pos+2] == '\"' and
+            L.buf[pos+3] != '\"':
           tokenEndIgnore(tok, pos+2)
           L.bufpos = pos + 3 # skip the three """
           break
@@ -827,7 +823,6 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
       of CR, LF:
         tokenEndIgnore(tok, pos)
         pos = handleCRLF(L, pos)
-        buf = L.buf
         add(tok.literal, "\n")
       of nimlexbase.EndOfFile:
         tokenEndIgnore(tok, pos)
@@ -838,16 +833,16 @@ proc getString(L: var TLexer, tok: var TToken, mode: StringMode) =
         L.bufpos = pos
         break
       else:
-        add(tok.literal, buf[pos])
+        add(tok.literal, L.buf[pos])
         inc(pos)
   else:
     # ordinary string literal
     if mode != normal: tok.tokType = tkRStrLit
     else: tok.tokType = tkStrLit
     while true:
-      var c = buf[pos]
+      var c = L.buf[pos]
       if c == '\"':
-        if mode != normal and buf[pos+1] == '\"':
+        if mode != normal and L.buf[pos+1] == '\"':
           inc(pos, 2)
           add(tok.literal, '"')
         else:
@@ -885,10 +880,9 @@ proc getCharacter(L: var TLexer, tok: var TToken) =
 proc getSymbol(L: var TLexer, tok: var TToken) =
   var h: Hash = 0
   var pos = L.bufpos
-  var buf = L.buf
   tokenBegin(tok, pos)
   while true:
-    var c = buf[pos]
+    var c = L.buf[pos]
     case c
     of 'a'..'z', '0'..'9', '\x80'..'\xFF':
       h = h !& ord(c)
@@ -898,7 +892,7 @@ proc getSymbol(L: var TLexer, tok: var TToken) =
       h = h !& ord(c)
       inc(pos)
     of '_':
-      if buf[pos+1] notin SymChars:
+      if L.buf[pos+1] notin SymChars:
         lexMessage(L, errGenerated, "invalid token: trailing underscore")
         break
       inc(pos)
@@ -923,11 +917,10 @@ proc endOperator(L: var TLexer, tok: var TToken, pos: int,
 
 proc getOperator(L: var TLexer, tok: var TToken) =
   var pos = L.bufpos
-  var buf = L.buf
   tokenBegin(tok, pos)
   var h: Hash = 0
   while true:
-    var c = buf[pos]
+    var c = L.buf[pos]
     if c notin OpChars: break
     h = h !& ord(c)
     inc(pos)
@@ -936,10 +929,10 @@ proc getOperator(L: var TLexer, tok: var TToken) =
   # advance pos but don't store it in L.bufpos so the next token (which might
   # be an operator too) gets the preceding spaces:
   tok.strongSpaceB = 0
-  while buf[pos] == ' ':
+  while L.buf[pos] == ' ':
     inc pos
     inc tok.strongSpaceB
-  if buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
+  if L.buf[pos] in {CR, LF, nimlexbase.EndOfFile}:
     tok.strongSpaceB = -1
 
 proc getPrecedence*(tok: TToken, strongSpaces: bool): int =
@@ -980,9 +973,8 @@ proc getPrecedence*(tok: TToken, strongSpaces: bool): int =
 
 proc newlineFollows*(L: TLexer): bool =
   var pos = L.bufpos
-  var buf = L.buf
   while true:
-    case buf[pos]
+    case L.buf[pos]
     of ' ', '\t':
       inc(pos)
     of CR, LF:
@@ -990,49 +982,47 @@ proc newlineFollows*(L: TLexer): bool =
       break
     of '#':
       inc(pos)
-      if buf[pos] == '#': inc(pos)
-      if buf[pos] != '[': return true
+      if L.buf[pos] == '#': inc(pos)
+      if L.buf[pos] != '[': return true
     else:
       break
 
 proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
                           isDoc: bool) =
   var pos = start
-  var buf = L.buf
   var toStrip = 0
   tokenBegin(tok, pos)
   # detect the amount of indentation:
   if isDoc:
     toStrip = getColNumber(L, pos)
-    while buf[pos] == ' ': inc pos
-    if buf[pos] in {CR, LF}:
+    while L.buf[pos] == ' ': inc pos
+    if L.buf[pos] in {CR, LF}:
       pos = handleCRLF(L, pos)
-      buf = L.buf
       toStrip = 0
-      while buf[pos] == ' ':
+      while L.buf[pos] == ' ':
         inc pos
         inc toStrip
   var nesting = 0
   while true:
-    case buf[pos]
+    case L.buf[pos]
     of '#':
       if isDoc:
-        if buf[pos+1] == '#' and buf[pos+2] == '[':
+        if L.buf[pos+1] == '#' and L.buf[pos+2] == '[':
           inc nesting
         tok.literal.add '#'
-      elif buf[pos+1] == '[':
+      elif L.buf[pos+1] == '[':
         inc nesting
       inc pos
     of ']':
       if isDoc:
-        if buf[pos+1] == '#' and buf[pos+2] == '#':
+        if L.buf[pos+1] == '#' and L.buf[pos+2] == '#':
           if nesting == 0:
             tokenEndIgnore(tok, pos+2)
             inc(pos, 3)
             break
           dec nesting
         tok.literal.add ']'
-      elif buf[pos+1] == '#':
+      elif L.buf[pos+1] == '#':
         if nesting == 0:
           tokenEndIgnore(tok, pos+1)
           inc(pos, 2)
@@ -1042,14 +1032,13 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
     of CR, LF:
       tokenEndIgnore(tok, pos)
       pos = handleCRLF(L, pos)
-      buf = L.buf
       # strip leading whitespace:
       when defined(nimpretty): tok.literal.add "\L"
       if isDoc:
         when not defined(nimpretty): tok.literal.add "\n"
         inc tok.iNumber
         var c = toStrip
-        while buf[pos] == ' ' and c > 0:
+        while L.buf[pos] == ' ' and c > 0:
           inc pos
           dec c
     of nimlexbase.EndOfFile:
@@ -1057,7 +1046,7 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
       lexMessagePos(L, errGenerated, pos, "end of multiline comment expected")
       break
     else:
-      if isDoc or defined(nimpretty): tok.literal.add buf[pos]
+      if isDoc or defined(nimpretty): tok.literal.add L.buf[pos]
       inc(pos)
   L.bufpos = pos
   when defined(nimpretty):
@@ -1065,49 +1054,47 @@ proc skipMultiLineComment(L: var TLexer; tok: var TToken; start: int;
 
 proc scanComment(L: var TLexer, tok: var TToken) =
   var pos = L.bufpos
-  var buf = L.buf
   tok.tokType = tkComment
   # iNumber contains the number of '\n' in the token
   tok.iNumber = 0
-  assert buf[pos+1] == '#'
+  assert L.buf[pos+1] == '#'
   when defined(nimpretty):
     tok.commentOffsetA = L.offsetBase + pos - 1
 
-  if buf[pos+2] == '[':
+  if L.buf[pos+2] == '[':
     skipMultiLineComment(L, tok, pos+3, true)
     return
   tokenBegin(tok, pos)
   inc(pos, 2)
 
   var toStrip = 0
-  while buf[pos] == ' ':
+  while L.buf[pos] == ' ':
     inc pos
     inc toStrip
 
   while true:
     var lastBackslash = -1
-    while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
-      if buf[pos] == '\\': lastBackslash = pos+1
-      add(tok.literal, buf[pos])
+    while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
+      if L.buf[pos] == '\\': lastBackslash = pos+1
+      add(tok.literal, L.buf[pos])
       inc(pos)
     tokenEndIgnore(tok, pos)
     pos = handleCRLF(L, pos)
-    buf = L.buf
     var indent = 0
-    while buf[pos] == ' ':
+    while L.buf[pos] == ' ':
       inc(pos)
       inc(indent)
 
-    if buf[pos] == '#' and buf[pos+1] == '#':
+    if L.buf[pos] == '#' and L.buf[pos+1] == '#':
       tok.literal.add "\n"
       inc(pos, 2)
       var c = toStrip
-      while buf[pos] == ' ' and c > 0:
+      while L.buf[pos] == ' ' and c > 0:
         inc pos
         dec c
       inc tok.iNumber
     else:
-      if buf[pos] > ' ':
+      if L.buf[pos] > ' ':
         L.indentAhead = indent
       tokenEndIgnore(tok, pos)
       break
@@ -1117,7 +1104,6 @@ proc scanComment(L: var TLexer, tok: var TToken) =
 
 proc skip(L: var TLexer, tok: var TToken) =
   var pos = L.bufpos
-  var buf = L.buf
   tokenBegin(tok, pos)
   tok.strongSpaceA = 0
   when defined(nimpretty):
@@ -1127,7 +1113,7 @@ proc skip(L: var TLexer, tok: var TToken) =
     tok.commentOffsetB = tok.commentOffsetA
     tok.line = -1
   while true:
-    case buf[pos]
+    case L.buf[pos]
     of ' ':
       inc(pos)
       inc(tok.strongSpaceA)
@@ -1137,13 +1123,12 @@ proc skip(L: var TLexer, tok: var TToken) =
     of CR, LF:
       tokenEndPrevious(tok, pos)
       pos = handleCRLF(L, pos)
-      buf = L.buf
       var indent = 0
       while true:
-        if buf[pos] == ' ':
+        if L.buf[pos] == ' ':
           inc(pos)
           inc(indent)
-        elif buf[pos] == '#' and buf[pos+1] == '[':
+        elif L.buf[pos] == '#' and L.buf[pos+1] == '[':
           when defined(nimpretty):
             hasComment = true
             if tok.line < 0:
@@ -1151,32 +1136,30 @@ proc skip(L: var TLexer, tok: var TToken) =
               commentIndent = indent
           skipMultiLineComment(L, tok, pos+2, false)
           pos = L.bufpos
-          buf = L.buf
         else:
           break
       tok.strongSpaceA = 0
       when defined(nimpretty):
-        if buf[pos] == '#' and tok.line < 0: commentIndent = indent
-      if buf[pos] > ' ' and (buf[pos] != '#' or buf[pos+1] == '#'):
+        if L.buf[pos] == '#' and tok.line < 0: commentIndent = indent
+      if L.buf[pos] > ' ' and (L.buf[pos] != '#' or L.buf[pos+1] == '#'):
         tok.indent = indent
         L.currLineIndent = indent
         break
     of '#':
       # do not skip documentation comment:
-      if buf[pos+1] == '#': break
+      if L.buf[pos+1] == '#': break
       when defined(nimpretty):
         hasComment = true
         if tok.line < 0:
           tok.line = L.lineNumber
 
-      if buf[pos+1] == '[':
+      if L.buf[pos+1] == '[':
         skipMultiLineComment(L, tok, pos+2, false)
         pos = L.bufpos
-        buf = L.buf
       else:
         tokenBegin(tok, pos)
-        while buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
-          when defined(nimpretty): tok.literal.add buf[pos]
+        while L.buf[pos] notin {CR, LF, nimlexbase.EndOfFile}:
+          when defined(nimpretty): tok.literal.add L.buf[pos]
           inc(pos)
         tokenEndIgnore(tok, pos+1)
         when defined(nimpretty):
diff --git a/compiler/nimlexbase.nim b/compiler/nimlexbase.nim
index 2e7416645..214147a2b 100644
--- a/compiler/nimlexbase.nim
+++ b/compiler/nimlexbase.nim
@@ -39,8 +39,7 @@ const
 type
   TBaseLexer* = object of RootObj
     bufpos*: int
-    buf*: cstring
-    bufLen*: int              # length of buffer in characters
+    buf*: string
     stream*: PLLStream        # we read from this stream
     lineNumber*: int          # the current line number
                               # private data:
@@ -65,11 +64,7 @@ proc handleLF*(L: var TBaseLexer, pos: int): int
   # of the LF.
 # implementation
 
-const
-  chrSize = sizeof(char)
-
 proc closeBaseLexer(L: var TBaseLexer) =
-  dealloc(L.buf)
   llStreamClose(L.stream)
 
 proc fillBuffer(L: var TBaseLexer) =
@@ -80,14 +75,13 @@ proc fillBuffer(L: var TBaseLexer) =
     oldBufLen: int
   # we know here that pos == L.sentinel, but not if this proc
   # is called the first time by initBaseLexer()
-  assert(L.sentinel < L.bufLen)
-  toCopy = L.bufLen - L.sentinel - 1
+  assert(L.sentinel < L.buf.len)
+  toCopy = L.buf.len - L.sentinel - 1
   assert(toCopy >= 0)
   if toCopy > 0:
-    moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize)
+    moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy)
     # "moveMem" handles overlapping regions
-  charsRead = llStreamRead(L.stream, addr(L.buf[toCopy]),
-                           (L.sentinel + 1) * chrSize) div chrSize
+  charsRead = llStreamRead(L.stream, addr L.buf[toCopy], L.sentinel + 1)
   s = toCopy + charsRead
   if charsRead < L.sentinel + 1:
     L.buf[s] = EndOfFile      # set end marker
@@ -96,7 +90,7 @@ proc fillBuffer(L: var TBaseLexer) =
     # compute sentinel:
     dec(s)                    # BUGFIX (valgrind)
     while true:
-      assert(s < L.bufLen)
+      assert(s < L.buf.len)
       while (s >= 0) and not (L.buf[s] in NewLines): dec(s)
       if s >= 0:
         # we found an appropriate character for a sentinel:
@@ -105,17 +99,16 @@ proc fillBuffer(L: var TBaseLexer) =
       else:
         # rather than to give up here because the line is too long,
         # double the buffer's size and try again:
-        oldBufLen = L.bufLen
-        L.bufLen = L.bufLen * 2
-        L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize))
-        assert(L.bufLen - oldBufLen == oldBufLen)
+        oldBufLen = L.buf.len
+        L.buf.setLen(L.buf.len * 2)
+        assert(L.buf.len - oldBufLen == oldBufLen)
         charsRead = llStreamRead(L.stream, addr(L.buf[oldBufLen]),
-                                 oldBufLen * chrSize) div chrSize
+                                 oldBufLen)
         if charsRead < oldBufLen:
           L.buf[oldBufLen + charsRead] = EndOfFile
           L.sentinel = oldBufLen + charsRead
           break
-        s = L.bufLen - 1
+        s = L.buf.len - 1
 
 proc fillBaseLexer(L: var TBaseLexer, pos: int): int =
   assert(pos <= L.sentinel)
@@ -149,8 +142,7 @@ proc openBaseLexer(L: var TBaseLexer, inputstream: PLLStream, bufLen = 8192) =
   assert(bufLen > 0)
   L.bufpos = 0
   L.offsetBase = 0
-  L.bufLen = bufLen
-  L.buf = cast[cstring](alloc(bufLen * chrSize))
+  L.buf = newString(bufLen)
   L.sentinel = bufLen - 1
   L.lineStart = 0
   L.lineNumber = 1            # lines start at 1
diff --git a/compiler/vmgen.nim b/compiler/vmgen.nim
index ab2ac8707..092c25a46 100644
--- a/compiler/vmgen.nim
+++ b/compiler/vmgen.nim
@@ -2039,6 +2039,8 @@ proc gen(c: PCtx; n: PNode; dest: var TDest; flags: TGenFlags = {}) =
     genConv(c, n, n.sons[1], dest)
   of nkObjDownConv:
     genConv(c, n, n.sons[0], dest)
+  of nkObjUpConv:
+    genConv(c, n, n.sons[0], dest)
   of nkVarSection, nkLetSection:
     unused(c, n, dest)
     genVarSection(c, n)
author	Arne Döring <arne.doering@gmx.net>	2019-02-28 22:57:57 +0100
committer	Andreas Rumpf <rumpf_a@web.de>	2019-02-28 22:57:57 +0100
commit	1102b8ac6e643c8f8428dd7db0994d26b0c65ea6 (patch)
tree	b08388f89e7867f03e5d59be00db70a6535752dc /compiler
parent	728ff1004a60835c18c44b64830ea08dc805485e (diff)
download	Nim-1102b8ac6e643c8f8428dd7db0994d26b0c65ea6.tar.gz