diff options
Diffstat (limited to 'lib/pure/lexbase.nim')
-rw-r--r--[-rwxr-xr-x] | lib/pure/lexbase.nim | 151 |
1 files changed, 76 insertions, 75 deletions
diff --git a/lib/pure/lexbase.nim b/lib/pure/lexbase.nim index bb207e92a..1efd97b24 100755..100644 --- a/lib/pure/lexbase.nim +++ b/lib/pure/lexbase.nim @@ -1,6 +1,6 @@ # # -# The Nimrod Compiler +# Nim's Runtime Library # (c) Copyright 2009 Andreas Rumpf # # See the file "copying.txt", included in this @@ -12,10 +12,13 @@ ## needs refilling. import - strutils, streams + std/[strutils, streams] + +when defined(nimPreviewSlimSystem): + import std/assertions const - EndOfFile* = '\0' ## end of file marker + EndOfFile* = '\0' ## end of file marker NewLines* = {'\c', '\L'} # Buffer handling: @@ -25,48 +28,22 @@ const # type - TBaseLexer* = object of TObject ## the base lexer. Inherit your lexer from - ## this object. - bufpos*: int ## the current position within the buffer - buf*: cstring ## the buffer itself - bufLen*: int ## length of buffer in characters - input: PStream ## the input stream - LineNumber*: int ## the current line number + BaseLexer* = object of RootObj ## the base lexer. Inherit your lexer from + ## this object. + bufpos*: int ## the current position within the buffer + buf*: string ## the buffer itself + input: Stream ## the input stream + lineNumber*: int ## the current line number sentinel: int - lineStart: int # index of last line start in buffer - fileOpened: bool - -proc open*(L: var TBaseLexer, input: PStream, bufLen: int = 8192) - ## inits the TBaseLexer with a stream to read from + lineStart: int # index of last line start in buffer + offsetBase*: int # use `offsetBase + bufpos` to get the offset + refillChars: set[char] -proc close*(L: var TBaseLexer) +proc close*(L: var BaseLexer) = ## closes the base lexer. This closes `L`'s associated stream too. + close(L.input) -proc getCurrentLine*(L: TBaseLexer, marker: bool = true): string - ## retrieves the current line. - -proc getColNumber*(L: TBaseLexer, pos: int): int - ## retrieves the current column. - -proc HandleCR*(L: var TBaseLexer, pos: int): int - ## Call this if you scanned over '\c' in the buffer; it returns the the - ## position to continue the scanning from. `pos` must be the position - ## of the '\c'. -proc HandleLF*(L: var TBaseLexer, pos: int): int - ## Call this if you scanned over '\L' in the buffer; it returns the the - ## position to continue the scanning from. `pos` must be the position - ## of the '\L'. - -# implementation - -const - chrSize = sizeof(char) - -proc close(L: var TBaseLexer) = - dealloc(L.buf) - L.input.close(L.input) - -proc FillBuffer(L: var TBaseLexer) = +proc fillBuffer(L: var BaseLexer) = var charsRead, toCopy, s: int # all are in characters, # not bytes (in case this @@ -74,23 +51,32 @@ proc FillBuffer(L: var TBaseLexer) = oldBufLen: int # we know here that pos == L.sentinel, but not if this proc # is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen) - toCopy = L.BufLen - L.sentinel - 1 + assert(L.sentinel + 1 <= L.buf.len) + toCopy = L.buf.len - (L.sentinel + 1) assert(toCopy >= 0) if toCopy > 0: - MoveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) # "moveMem" handles overlapping regions - charsRead = L.input.readData(L.input, addr(L.buf[toCopy]), - (L.sentinel + 1) * chrSize) div chrSize + when defined(js) or defined(nimscript): + # nimscript has to be here to avoid compiling other branch (moveMem) + for i in 0 ..< toCopy: + L.buf[i] = L.buf[L.sentinel + 1 + i] + else: + when nimvm: + for i in 0 ..< toCopy: + L.buf[i] = L.buf[L.sentinel + 1 + i] + else: + # "moveMem" handles overlapping regions + moveMem(addr L.buf[0], addr L.buf[L.sentinel + 1], toCopy) + charsRead = L.input.readDataStr(L.buf, toCopy ..< toCopy + L.sentinel + 1) s = toCopy + charsRead if charsRead < L.sentinel + 1: - L.buf[s] = EndOfFile # set end marker + L.buf[s] = EndOfFile # set end marker L.sentinel = s else: # compute sentinel: - dec(s) # BUGFIX (valgrind) + dec(s) # BUGFIX (valgrind) while true: - assert(s < L.bufLen) - while (s >= 0) and not (L.buf[s] in NewLines): Dec(s) + assert(s < L.buf.len) + while s >= 0 and L.buf[s] notin L.refillChars: dec(s) if s >= 0: # we found an appropriate character for a sentinel: L.sentinel = s @@ -98,62 +84,78 @@ proc FillBuffer(L: var TBaseLexer) = else: # rather than to give up here because the line is too long, # double the buffer's size and try again: - oldBufLen = L.BufLen - L.bufLen = L.BufLen * 2 - L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) - assert(L.bufLen - oldBuflen == oldBufLen) - charsRead = L.input.ReadData(L.input, addr(L.buf[oldBufLen]), - oldBufLen * chrSize) div chrSize + oldBufLen = L.buf.len + L.buf.setLen(L.buf.len * 2) + charsRead = readDataStr(L.input, L.buf, oldBufLen ..< L.buf.len) if charsRead < oldBufLen: L.buf[oldBufLen + charsRead] = EndOfFile L.sentinel = oldBufLen + charsRead break - s = L.bufLen - 1 + s = L.buf.len - 1 -proc fillBaseLexer(L: var TBaseLexer, pos: int): int = +proc fillBaseLexer(L: var BaseLexer, pos: int): int = assert(pos <= L.sentinel) if pos < L.sentinel: - result = pos + 1 # nothing to do + result = pos + 1 # nothing to do else: fillBuffer(L) - L.bufpos = 0 # XXX: is this really correct? + L.offsetBase += pos + L.bufpos = 0 result = 0 - L.lineStart = result -proc HandleCR(L: var TBaseLexer, pos: int): int = +proc handleCR*(L: var BaseLexer, pos: int): int = + ## Call this if you scanned over `'\c'` in the buffer; it returns the + ## position to continue the scanning from. `pos` must be the position + ## of the `'\c'`. assert(L.buf[pos] == '\c') - inc(L.linenumber) + inc(L.lineNumber) result = fillBaseLexer(L, pos) if L.buf[result] == '\L': result = fillBaseLexer(L, result) + L.lineStart = result -proc HandleLF(L: var TBaseLexer, pos: int): int = +proc handleLF*(L: var BaseLexer, pos: int): int = + ## Call this if you scanned over `'\L'` in the buffer; it returns the + ## position to continue the scanning from. `pos` must be the position + ## of the `'\L'`. assert(L.buf[pos] == '\L') - inc(L.linenumber) + inc(L.lineNumber) + result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; + L.lineStart = result + +proc handleRefillChar*(L: var BaseLexer, pos: int): int = + ## Call this if a terminator character other than a new line is scanned + ## at `pos`; it returns the position to continue the scanning from. + assert(L.buf[pos] in L.refillChars) result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; -proc skip_UTF_8_BOM(L: var TBaseLexer) = +proc skipUtf8Bom(L: var BaseLexer) = if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): inc(L.bufpos, 3) inc(L.lineStart, 3) -proc open(L: var TBaseLexer, input: PStream, bufLen: int = 8192) = +proc open*(L: var BaseLexer, input: Stream, bufLen: int = 8192; + refillChars: set[char] = NewLines) = + ## inits the BaseLexer with a stream to read from. assert(bufLen > 0) assert(input != nil) L.input = input L.bufpos = 0 - L.bufLen = bufLen - L.buf = cast[cstring](alloc(bufLen * chrSize)) + L.offsetBase = 0 + L.refillChars = refillChars + L.buf = newString(bufLen) L.sentinel = bufLen - 1 L.lineStart = 0 - L.linenumber = 1 # lines start at 1 + L.lineNumber = 1 # lines start at 1 fillBuffer(L) - skip_UTF_8_BOM(L) + skipUtf8Bom(L) -proc getColNumber(L: TBaseLexer, pos: int): int = +proc getColNumber*(L: BaseLexer, pos: int): int = + ## retrieves the current column. result = abs(pos - L.lineStart) -proc getCurrentLine(L: TBaseLexer, marker: bool = true): string = +proc getCurrentLine*(L: BaseLexer, marker: bool = true): string = + ## retrieves the current line. var i: int result = "" i = L.lineStart @@ -162,5 +164,4 @@ proc getCurrentLine(L: TBaseLexer, marker: bool = true): string = inc(i) add(result, "\n") if marker: - add(result, RepeatChar(getColNumber(L, L.bufpos)) & "^\n") - + add(result, spaces(getColNumber(L, L.bufpos)) & "^\n") |