diff options
Diffstat (limited to 'nim/lexbase.pas')
-rwxr-xr-x | nim/lexbase.pas | 232 |
1 files changed, 0 insertions, 232 deletions
diff --git a/nim/lexbase.pas b/nim/lexbase.pas deleted file mode 100755 index 2b056c04f..000000000 --- a/nim/lexbase.pas +++ /dev/null @@ -1,232 +0,0 @@ -// -// -// The Nimrod Compiler -// (c) Copyright 2008 Andreas Rumpf -// -// See the file "copying.txt", included in this -// distribution, for details about the copyright. -// -unit lexbase; - -// Base Object of a lexer with efficient buffer handling. In fact -// I believe that this is the most efficient method of buffer -// handling that exists! Only at line endings checks are necessary -// if the buffer needs refilling. - -interface - -uses - nsystem, llstream, charsets, strutils; - -{@emit -const - Lrz = ' '; - Apo = ''''; - Tabulator = #9; - ESC = #27; - CR = #13; - FF = #12; - LF = #10; - BEL = #7; - BACKSPACE = #8; - VT = #11; -} - -const - EndOfFile = #0; // end of file marker -{ A little picture makes everything clear :-) - buf: - "Example Text\n ha!" bufLen = 17 - ^pos = 0 ^ sentinel = 12 -} - NewLines = {@set}[CR, LF]; - -type - TBaseLexer = object(NObject) - bufpos: int; - buf: PChar; - bufLen: int; // length of buffer in characters - stream: PLLStream; // we read from this stream - LineNumber: int; // the current line number - // private data: - sentinel: int; - lineStart: int; // index of last line start in buffer - end; - -procedure openBaseLexer(out L: TBaseLexer; - inputstream: PLLStream; - bufLen: int = 8192); - // 8K is a reasonable buffer size - -procedure closeBaseLexer(var L: TBaseLexer); - -function getCurrentLine(const L: TBaseLexer; marker: boolean = true): string; -function getColNumber(const L: TBaseLexer; pos: int): int; - -function HandleCR(var L: TBaseLexer; pos: int): int; -// Call this if you scanned over CR in the buffer; it returns the -// position to continue the scanning from. `pos` must be the position -// of the CR. - -function HandleLF(var L: TBaseLexer; pos: int): int; -// Call this if you scanned over LF in the buffer; it returns the the -// position to continue the scanning from. `pos` must be the position -// of the LF. - -implementation - -const - chrSize = sizeof(char); - -procedure closeBaseLexer(var L: TBaseLexer); -begin - dealloc(L.buf); - LLStreamClose(L.stream); -end; - -{@ignore} -{$ifdef false} -procedure printBuffer(const L: TBaseLexer); -var - i: int; -begin - writeln('____________________________________'); - writeln('sentinel: ', L.sentinel); - writeln('bufLen: ', L.bufLen); - writeln('buf: '); - for i := 0 to L.bufLen-1 do write(L.buf[i]); - writeln(NL + '____________________________________'); -end; -{$endif} -{@emit} - -procedure FillBuffer(var L: TBaseLexer); -var - charsRead, toCopy, s: int; // all are in characters, - // not bytes (in case this - // is not the same) - oldBufLen: int; -begin - // we know here that pos == L.sentinel, but not if this proc - // is called the first time by initBaseLexer() - assert(L.sentinel < L.bufLen); - toCopy := L.BufLen - L.sentinel - 1; - assert(toCopy >= 0); - if toCopy > 0 then - MoveMem(L.buf, addr(L.buf[L.sentinel+1]), toCopy * chrSize); - // "moveMem" handles overlapping regions - charsRead := LLStreamRead(L.stream, addr(L.buf[toCopy]), - (L.sentinel+1) * chrSize) div chrSize; - s := toCopy + charsRead; - if charsRead < L.sentinel+1 then begin - L.buf[s] := EndOfFile; // set end marker - L.sentinel := s - end - else begin - // compute sentinel: - dec(s); // BUGFIX (valgrind) - while true do begin - assert(s < L.bufLen); - while (s >= 0) and not (L.buf[s] in NewLines) do Dec(s); - if s >= 0 then begin - // we found an appropriate character for a sentinel: - L.sentinel := s; - break - end - else begin - // rather than to give up here because the line is too long, - // double the buffer's size and try again: - oldBufLen := L.BufLen; - L.bufLen := L.BufLen * 2; - L.buf := {@cast}PChar(realloc(L.buf, L.bufLen*chrSize)); - assert(L.bufLen - oldBuflen = oldBufLen); - charsRead := LLStreamRead(L.stream, addr(L.buf[oldBufLen]), - oldBufLen*chrSize) div chrSize; - if charsRead < oldBufLen then begin - L.buf[oldBufLen+charsRead] := EndOfFile; - L.sentinel := oldBufLen+charsRead; - break - end; - s := L.bufLen - 1 - end - end - end -end; - -function fillBaseLexer(var L: TBaseLexer; pos: int): int; -begin - assert(pos <= L.sentinel); - if pos < L.sentinel then begin - result := pos+1; // nothing to do - end - else begin - fillBuffer(L); - L.bufpos := 0; // XXX: is this really correct? - result := 0; - end; - L.lineStart := result; -end; - -function HandleCR(var L: TBaseLexer; pos: int): int; -begin - assert(L.buf[pos] = CR); - inc(L.linenumber); - result := fillBaseLexer(L, pos); - if L.buf[result] = LF then begin - result := fillBaseLexer(L, result); - end; - //L.lastNL := result-1; // BUGFIX: was: result; -end; - -function HandleLF(var L: TBaseLexer; pos: int): int; -begin - assert(L.buf[pos] = LF); - inc(L.linenumber); - result := fillBaseLexer(L, pos); - //L.lastNL := result-1; // BUGFIX: was: result; -end; - -procedure skip_UTF_8_BOM(var L: TBaseLexer); -begin - if (L.buf[0] = #239) and (L.buf[1] = #187) and (L.buf[2] = #191) then begin - inc(L.bufpos, 3); - inc(L.lineStart, 3) - end -end; - -procedure openBaseLexer(out L: TBaseLexer; inputstream: PLLStream; - bufLen: int = 8192); -begin - assert(bufLen > 0); - L.bufpos := 0; - L.bufLen := bufLen; - L.buf := {@cast}PChar(alloc(bufLen * chrSize)); - L.sentinel := bufLen-1; - L.lineStart := 0; - L.linenumber := 1; // lines start at 1 - L.stream := inputstream; - fillBuffer(L); - skip_UTF_8_BOM(L); -end; - -function getColNumber(const L: TBaseLexer; pos: int): int; -begin - result := abs(pos - L.lineStart); -end; - -function getCurrentLine(const L: TBaseLexer; marker: boolean = true): string; -var - i: int; -begin - result := ''; - i := L.lineStart; - while not (L.buf[i] in [CR, LF, EndOfFile]) do begin - addChar(result, L.buf[i]); - inc(i) - end; - result := result +{&} NL; - if marker then - result := result +{&} RepeatChar(getColNumber(L, L.bufpos)) +{&} '^' +{&} NL -end; - -end. |