summary refs log tree commit diff stats
path: root/lib/pure/lexbase.nim
diff options
context:
space:
mode:
Diffstat (limited to 'lib/pure/lexbase.nim')
-rw-r--r--lib/pure/lexbase.nim134
1 files changed, 66 insertions, 68 deletions
diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim
index 23a87d9f8..1efd97b24 100644
--- a/lib/pure/lexbase.nim
+++ b/lib/pure/lexbase.nim
@@ -1,6 +1,6 @@
 #
 #
-#           The Nim Compiler
+#            Nim's Runtime Library
 #        (c) Copyright 2009 Andreas Rumpf
 #
 #    See the file "copying.txt", included in this
@@ -12,10 +12,13 @@
 ## needs refilling.
 
 import
-  strutils, streams
+  std/[strutils, streams]
+
+when defined(nimPreviewSlimSystem):
+  import std/assertions
 
 const
-  EndOfFile* = '\0'           ## end of file marker
+  EndOfFile* = '\0' ## end of file marker
   NewLines* = {'\c', '\L'}
 
 # Buffer handling:
@@ -27,45 +30,17 @@ const
 type
   BaseLexer* = object of RootObj ## the base lexer. Inherit your lexer from
                                  ## this object.
-    bufpos*: int              ## the current position within the buffer
-    buf*: cstring             ## the buffer itself
-    bufLen*: int              ## length of buffer in characters
-    input: Stream            ## the input stream
-    lineNumber*: int          ## the current line number
+    bufpos*: int                 ## the current position within the buffer
+    buf*: string                 ## the buffer itself
+    input: Stream                ## the input stream
+    lineNumber*: int             ## the current line number
     sentinel: int
-    lineStart: int            # index of last line start in buffer
-    fileOpened: bool
-
-{.deprecated: [TBaseLexer: BaseLexer].}
+    lineStart: int               # index of last line start in buffer
+    offsetBase*: int             # use `offsetBase + bufpos` to get the offset
+    refillChars: set[char]
 
-proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192)
-  ## inits the TBaseLexer with a stream to read from
-
-proc close*(L: var BaseLexer)
+proc close*(L: var BaseLexer) =
   ## closes the base lexer. This closes `L`'s associated stream too.
-
-proc getCurrentLine*(L: BaseLexer, marker: bool = true): string
-  ## retrieves the current line.
-
-proc getColNumber*(L: BaseLexer, pos: int): int
-  ## retrieves the current column.
-
-proc handleCR*(L: var BaseLexer, pos: int): int
-  ## Call this if you scanned over '\c' in the buffer; it returns the the
-  ## position to continue the scanning from. `pos` must be the position
-  ## of the '\c'.
-proc handleLF*(L: var BaseLexer, pos: int): int
-  ## Call this if you scanned over '\L' in the buffer; it returns the the
-  ## position to continue the scanning from. `pos` must be the position
-  ## of the '\L'.
-
-# implementation
-
-const
-  chrSize = sizeof(char)
-
-proc close(L: var BaseLexer) =
-  dealloc(L.buf)
   close(L.input)
 
 proc fillBuffer(L: var BaseLexer) =
@@ -76,24 +51,32 @@ proc fillBuffer(L: var BaseLexer) =
     oldBufLen: int
   # we know here that pos == L.sentinel, but not if this proc
   # is called the first time by initBaseLexer()
-  assert(L.sentinel < L.bufLen)
-  toCopy = L.bufLen - L.sentinel - 1
+  assert(L.sentinel + 1 <= L.buf.len)
+  toCopy = L.buf.len - (L.sentinel + 1)
   assert(toCopy >= 0)
   if toCopy > 0:
-    moveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) 
-    # "moveMem" handles overlapping regions
-  charsRead = readData(L.input, addr(L.buf[toCopy]),
-                       (L.sentinel + 1) * chrSize) div chrSize
+    when defined(js) or defined(nimscript):
+      # nimscript has to be here to avoid compiling other branch (moveMem)
+      for i in 0 ..< toCopy:
+        L.buf[i] = L.buf[L.sentinel + 1 + i]
+    else:
+      when nimvm:
+        for i in 0 ..< toCopy:
+          L.buf[i] = L.buf[L.sentinel + 1 + i]
+      else:
+        # "moveMem" handles overlapping regions
+        moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy)
+  charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1)
   s = toCopy + charsRead
   if charsRead < L.sentinel + 1:
-    L.buf[s] = EndOfFile      # set end marker
+    L.buf[s] = EndOfFile # set end marker
     L.sentinel = s
   else:
     # compute sentinel:
-    dec(s)                    # BUGFIX (valgrind)
+    dec(s) # BUGFIX (valgrind)
     while true:
-      assert(s < L.bufLen)
-      while (s >= 0) and not (L.buf[s] in NewLines): dec(s)
+      assert(s < L.buf.len)
+      while s >= 0 and L.buf[s] notin L.refillChars: dec(s)
       if s >= 0:
         # we found an appropriate character for a sentinel:
         L.sentinel = s
@@ -101,62 +84,78 @@ proc fillBuffer(L: var BaseLexer) =
       else:
         # rather than to give up here because the line is too long,
         # double the buffer's size and try again:
-        oldBufLen = L.bufLen
-        L.bufLen = L.bufLen * 2
-        L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize))
-        assert(L.bufLen - oldBufLen == oldBufLen)
-        charsRead = readData(L.input, addr(L.buf[oldBufLen]),
-                             oldBufLen * chrSize) div chrSize
+        oldBufLen = L.buf.len
+        L.buf.setLen(L.buf.len * 2)
+        charsRead = readDataStr(L.input, L.buf, oldBufLen ..< L.buf.len)
         if charsRead < oldBufLen:
           L.buf[oldBufLen + charsRead] = EndOfFile
           L.sentinel = oldBufLen + charsRead
           break
-        s = L.bufLen - 1
+        s = L.buf.len - 1
 
 proc fillBaseLexer(L: var BaseLexer, pos: int): int =
   assert(pos <= L.sentinel)
   if pos < L.sentinel:
-    result = pos + 1          # nothing to do
+    result = pos + 1 # nothing to do
   else:
     fillBuffer(L)
-    L.bufpos = 0              # XXX: is this really correct?
+    L.offsetBase += pos
+    L.bufpos = 0
     result = 0
-  L.lineStart = result
 
-proc handleCR(L: var BaseLexer, pos: int): int =
+proc handleCR*(L: var BaseLexer, pos: int): int =
+  ## Call this if you scanned over `'\c'` in the buffer; it returns the
+  ## position to continue the scanning from. `pos` must be the position
+  ## of the `'\c'`.
   assert(L.buf[pos] == '\c')
   inc(L.lineNumber)
   result = fillBaseLexer(L, pos)
   if L.buf[result] == '\L':
     result = fillBaseLexer(L, result)
+  L.lineStart = result
 
-proc handleLF(L: var BaseLexer, pos: int): int =
+proc handleLF*(L: var BaseLexer, pos: int): int =
+  ## Call this if you scanned over `'\L'` in the buffer; it returns the
+  ## position to continue the scanning from. `pos` must be the position
+  ## of the `'\L'`.
   assert(L.buf[pos] == '\L')
   inc(L.lineNumber)
   result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result;
+  L.lineStart = result
+
+proc handleRefillChar*(L: var BaseLexer, pos: int): int =
+  ## Call this if a terminator character other than a new line is scanned
+  ## at `pos`; it returns the position to continue the scanning from.
+  assert(L.buf[pos] in L.refillChars)
+  result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result;
 
 proc skipUtf8Bom(L: var BaseLexer) =
   if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'):
     inc(L.bufpos, 3)
     inc(L.lineStart, 3)
 
-proc open(L: var BaseLexer, input: Stream, bufLen: int = 8192) =
+proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192;
+           refillChars: set[char] = NewLines) =
+  ## inits the BaseLexer with a stream to read from.
   assert(bufLen > 0)
   assert(input != nil)
   L.input = input
   L.bufpos = 0
-  L.bufLen = bufLen
-  L.buf = cast[cstring](alloc(bufLen * chrSize))
+  L.offsetBase = 0
+  L.refillChars = refillChars
+  L.buf = newString(bufLen)
   L.sentinel = bufLen - 1
   L.lineStart = 0
-  L.lineNumber = 1            # lines start at 1
+  L.lineNumber = 1 # lines start at 1
   fillBuffer(L)
   skipUtf8Bom(L)
 
-proc getColNumber(L: BaseLexer, pos: int): int =
+proc getColNumber*(L: BaseLexer, pos: int): int =
+  ## retrieves the current column.
   result = abs(pos - L.lineStart)
 
-proc getCurrentLine(L: BaseLexer, marker: bool = true): string =
+proc getCurrentLine*(L: BaseLexer, marker: bool = true): string =
+  ## retrieves the current line.
   var i: int
   result = ""
   i = L.lineStart
@@ -166,4 +165,3 @@ proc getCurrentLine(L: BaseLexer, marker: bool = true): string =
   add(result, "\n")
   if marker:
     add(result, spaces(getColNumber(L, L.bufpos)) & "^\n")
-