diff options
Diffstat (limited to 'rod/lexbase.nim')
-rw-r--r-- | rod/lexbase.nim | 170 |
1 files changed, 0 insertions, 170 deletions
diff --git a/rod/lexbase.nim b/rod/lexbase.nim deleted file mode 100644 index 4f946b35f..000000000 --- a/rod/lexbase.nim +++ /dev/null @@ -1,170 +0,0 @@ -# -# -# The Nimrod Compiler -# (c) Copyright 2010 Andreas Rumpf -# -# See the file "copying.txt", included in this -# distribution, for details about the copyright. -# - -# Base Object of a lexer with efficient buffer handling. In fact -# I believe that this is the most efficient method of buffer -# handling that exists! Only at line endings checks are necessary -# if the buffer needs refilling. - -import - llstream, strutils - -const - Lrz* = ' ' - Apo* = '\'' - Tabulator* = '\x09' - ESC* = '\x1B' - CR* = '\x0D' - FF* = '\x0C' - LF* = '\x0A' - BEL* = '\x07' - BACKSPACE* = '\x08' - VT* = '\x0B' - -const - EndOfFile* = '\0' # end of file marker - # A little picture makes everything clear :-) - # buf: - # "Example Text\n ha!" bufLen = 17 - # ^pos = 0 ^ sentinel = 12 - # - NewLines* = {CR, LF} - -type - TBaseLexer* = object of TObject - bufpos*: int - buf*: cstring - bufLen*: int # length of buffer in characters - stream*: PLLStream # we read from this stream - LineNumber*: int # the current line number - # private data: - sentinel*: int - lineStart*: int # index of last line start in buffer - - -proc openBaseLexer*(L: var TBaseLexer, inputstream: PLLStream, - bufLen: int = 8192) - # 8K is a reasonable buffer size -proc closeBaseLexer*(L: var TBaseLexer) -proc getCurrentLine*(L: TBaseLexer, marker: bool = true): string -proc getColNumber*(L: TBaseLexer, pos: int): int -proc HandleCR*(L: var TBaseLexer, pos: int): int - # Call this if you scanned over CR in the buffer; it returns the - # position to continue the scanning from. `pos` must be the position - # of the CR. -proc HandleLF*(L: var TBaseLexer, pos: int): int - # Call this if you scanned over LF in the buffer; it returns the the - # position to continue the scanning from. `pos` must be the position - # of the LF. -# implementation - -const - chrSize = sizeof(char) - -proc closeBaseLexer(L: var TBaseLexer) = - dealloc(L.buf) - LLStreamClose(L.stream) - -proc FillBuffer(L: var TBaseLexer) = - var - charsRead, toCopy, s: int # all are in characters, - # not bytes (in case this - # is not the same) - oldBufLen: int - # we know here that pos == L.sentinel, but not if this proc - # is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen) - toCopy = L.BufLen - L.sentinel - 1 - assert(toCopy >= 0) - if toCopy > 0: - MoveMem(L.buf, addr(L.buf[L.sentinel + 1]), toCopy * chrSize) - # "moveMem" handles overlapping regions - charsRead = LLStreamRead(L.stream, addr(L.buf[toCopy]), - (L.sentinel + 1) * chrSize) div chrSize - s = toCopy + charsRead - if charsRead < L.sentinel + 1: - L.buf[s] = EndOfFile # set end marker - L.sentinel = s - else: - # compute sentinel: - dec(s) # BUGFIX (valgrind) - while true: - assert(s < L.bufLen) - while (s >= 0) and not (L.buf[s] in NewLines): Dec(s) - if s >= 0: - # we found an appropriate character for a sentinel: - L.sentinel = s - break - else: - # rather than to give up here because the line is too long, - # double the buffer's size and try again: - oldBufLen = L.BufLen - L.bufLen = L.BufLen * 2 - L.buf = cast[cstring](realloc(L.buf, L.bufLen * chrSize)) - assert(L.bufLen - oldBuflen == oldBufLen) - charsRead = LLStreamRead(L.stream, addr(L.buf[oldBufLen]), - oldBufLen * chrSize) div chrSize - if charsRead < oldBufLen: - L.buf[oldBufLen + charsRead] = EndOfFile - L.sentinel = oldBufLen + charsRead - break - s = L.bufLen - 1 - -proc fillBaseLexer(L: var TBaseLexer, pos: int): int = - assert(pos <= L.sentinel) - if pos < L.sentinel: - result = pos + 1 # nothing to do - else: - fillBuffer(L) - L.bufpos = 0 # XXX: is this really correct? - result = 0 - L.lineStart = result - -proc HandleCR(L: var TBaseLexer, pos: int): int = - assert(L.buf[pos] == CR) - inc(L.linenumber) - result = fillBaseLexer(L, pos) - if L.buf[result] == LF: - result = fillBaseLexer(L, result) - -proc HandleLF(L: var TBaseLexer, pos: int): int = - assert(L.buf[pos] == LF) - inc(L.linenumber) - result = fillBaseLexer(L, pos) #L.lastNL := result-1; // BUGFIX: was: result; - -proc skip_UTF_8_BOM(L: var TBaseLexer) = - if (L.buf[0] == '\xEF') and (L.buf[1] == '\xBB') and (L.buf[2] == '\xBF'): - inc(L.bufpos, 3) - inc(L.lineStart, 3) - -proc openBaseLexer(L: var TBaseLexer, inputstream: PLLStream, bufLen = 8192) = - assert(bufLen > 0) - L.bufpos = 0 - L.bufLen = bufLen - L.buf = cast[cstring](alloc(bufLen * chrSize)) - L.sentinel = bufLen - 1 - L.lineStart = 0 - L.linenumber = 1 # lines start at 1 - L.stream = inputstream - fillBuffer(L) - skip_UTF_8_BOM(L) - -proc getColNumber(L: TBaseLexer, pos: int): int = - result = abs(pos - L.lineStart) - -proc getCurrentLine(L: TBaseLexer, marker: bool = true): string = - result = "" - var i = L.lineStart - while not (L.buf[i] in {CR, LF, EndOfFile}): - add(result, L.buf[i]) - inc(i) - result = result & "\n" - if marker: - result = result & RepeatChar(getColNumber(L, L.bufpos)) & '^' & "\n" - |