summary refs log tree commit diff stats
path: root/nim/scanner.pas
diff options
context:
space:
mode:
Diffstat (limited to 'nim/scanner.pas')
-rwxr-xr-xnim/scanner.pas1036
1 files changed, 0 insertions, 1036 deletions
diff --git a/nim/scanner.pas b/nim/scanner.pas
deleted file mode 100755
index c03ae9224..000000000
--- a/nim/scanner.pas
+++ /dev/null
@@ -1,1036 +0,0 @@
-//
-//
-//           The Nimrod Compiler
-//        (c) Copyright 2009 Andreas Rumpf
-//
-//    See the file "copying.txt", included in this
-//    distribution, for details about the copyright.
-//
-unit scanner;
-
-// This scanner is handwritten for efficiency. I used an elegant buffering
-// scheme which I have not seen anywhere else:
-// We guarantee that a whole line is in the buffer. Thus only when scanning
-// the \n or \r character we have to check wether we need to read in the next 
-// chunk. (\n or \r already need special handling for incrementing the line
-// counter; choosing both \n and \r allows the scanner to properly read Unix,
-// DOS or Macintosh text files, even when it is not the native format.
-
-interface
-
-{$include 'config.inc'}
-
-uses
-  charsets, nsystem, sysutils, nhashes, options, msgs, strutils, platform,
-  idents, lexbase, llstream, wordrecg;
-
-const
-  MaxLineLength = 80; // lines longer than this lead to a warning
-
-  numChars: TCharSet = ['0'..'9','a'..'z','A'..'Z'];
-  SymChars: TCharSet = ['a'..'z', 'A'..'Z', '0'..'9', #128..#255];
-  SymStartChars: TCharSet = ['a'..'z', 'A'..'Z', #128..#255];
-  OpChars: TCharSet = ['+', '-', '*', '/', '\', '<', '>', '!', '?', '^', '.',
-    '|', '=', '%', '&', '$', '@', '~', #128..#255];
-
-type
-  TTokType = (tkInvalid, tkEof, // order is important here!
-    tkSymbol,
-    // keywords:
-    //[[[cog
-    //from string import split, capitalize
-    //keywords = split(open("data/keywords.txt").read())
-    //idents = ""
-    //strings = ""
-    //i = 1
-    //for k in keywords:
-    //  idents = idents + "tk" + capitalize(k) + ", "
-    //  strings = strings + "'" + k + "', "
-    //  if i % 4 == 0:
-    //    idents = idents + "\n"
-    //    strings = strings + "\n"
-    //  i = i + 1
-    //cog.out(idents)
-    //]]]
-    tkAddr, tkAnd, tkAs, tkAsm, 
-    tkBind, tkBlock, tkBreak, tkCase, 
-    tkCast, tkConst, tkContinue, tkConverter, 
-    tkDiscard, tkDistinct, tkDiv, tkElif, 
-    tkElse, tkEnd, tkEnum, tkExcept, 
-    tkFinally, tkFor, tkFrom, tkGeneric, 
-    tkIf, tkImplies, tkImport, tkIn, 
-    tkInclude, tkIs, tkIsnot, tkIterator, 
-    tkLambda, tkMacro, tkMethod, tkMod, 
-    tkNil, tkNot, tkNotin, tkObject, 
-    tkOf, tkOr, tkOut, tkProc, 
-    tkPtr, tkRaise, tkRef, tkReturn, 
-    tkShl, tkShr, tkTemplate, tkTry, 
-    tkTuple, tkType, tkVar, tkWhen, 
-    tkWhile, tkWith, tkWithout, tkXor, 
-    tkYield, 
-    //[[[end]]]
-    tkIntLit, tkInt8Lit, tkInt16Lit, tkInt32Lit, tkInt64Lit,
-    tkFloatLit, tkFloat32Lit, tkFloat64Lit,
-    tkStrLit, tkRStrLit, tkTripleStrLit, tkCallRStrLit, tkCallTripleStrLit,
-    tkCharLit, tkParLe, tkParRi, tkBracketLe, tkBracketRi, tkCurlyLe, tkCurlyRi,
-    tkBracketDotLe, tkBracketDotRi, // [. and  .]
-    tkCurlyDotLe, tkCurlyDotRi, // {.  and  .}
-    tkParDotLe, tkParDotRi, // (. and .)
-    tkComma, tkSemiColon, tkColon,
-    tkEquals, tkDot, tkDotDot, tkHat, tkOpr,
-    tkComment, tkAccent, tkInd, tkSad, tkDed,
-    // pseudo token types used by the source renderers:
-    tkSpaces, tkInfixOpr, tkPrefixOpr, tkPostfixOpr
-  );
-  TTokTypes = set of TTokType;
-const
-  tokKeywordLow = succ(tkSymbol);
-  tokKeywordHigh = pred(tkIntLit);
-  tokOperators: TTokTypes = {@set}[tkOpr, tkSymbol, tkBracketLe, tkBracketRi,
-    tkIn, tkIs, tkIsNot, tkEquals, tkDot, tkHat, tkNot, tkAnd, tkOr, tkXor,
-    tkShl, tkShr, tkDiv, tkMod, tkNotIn];
-
-  TokTypeToStr: array [TTokType] of string = (
-    'tkInvalid', '[EOF]',
-    'tkSymbol',
-    //[[[cog
-    //cog.out(strings)
-    //]]]
-    'addr', 'and', 'as', 'asm', 
-    'bind', 'block', 'break', 'case', 
-    'cast', 'const', 'continue', 'converter', 
-    'discard', 'distinct', 'div', 'elif', 
-    'else', 'end', 'enum', 'except', 
-    'finally', 'for', 'from', 'generic', 
-    'if', 'implies', 'import', 'in', 
-    'include', 'is', 'isnot', 'iterator', 
-    'lambda', 'macro', 'method', 'mod', 
-    'nil', 'not', 'notin', 'object', 
-    'of', 'or', 'out', 'proc', 
-    'ptr', 'raise', 'ref', 'return', 
-    'shl', 'shr', 'template', 'try', 
-    'tuple', 'type', 'var', 'when', 
-    'while', 'with', 'without', 'xor', 
-    'yield', 
-    //[[[end]]]
-    'tkIntLit', 'tkInt8Lit', 'tkInt16Lit', 'tkInt32Lit', 'tkInt64Lit',
-    'tkFloatLit', 'tkFloat32Lit', 'tkFloat64Lit',
-    'tkStrLit', 'tkRStrLit', 'tkTripleStrLit',
-    'tkCallRStrLit', 'tkCallTripleStrLit',
-    'tkCharLit',
-    '('+'', ')'+'', '['+'', ']'+'', '{'+'', '}'+'',
-    '[.', '.]', '{.', '.}', '(.', '.)', ','+'', ';'+'', ':'+'',
-    '='+'', '.'+'', '..', '^'+'', 'tkOpr',
-    'tkComment', '`'+'', '[new indentation]', '[same indentation]',
-    '[dedentation]',
-    'tkSpaces', 'tkInfixOpr', 'tkPrefixOpr', 'tkPostfixOpr'
-  );
-
-type
-  TNumericalBase = (base10, // base10 is listed as the first element,
-                            // so that it is the correct default value
-                    base2,
-                    base8,
-                    base16);
-  PToken = ^TToken;
-  TToken = object          // a Nimrod token
-    tokType: TTokType;     // the type of the token
-    indent: int;           // the indentation; only valid if tokType = tkIndent
-    ident: PIdent;         // the parsed identifier
-    iNumber: BiggestInt;   // the parsed integer literal
-    fNumber: BiggestFloat; // the parsed floating point literal
-    base: TNumericalBase;  // the numerical base; only valid for int
-                           // or float literals
-    literal: string;       // the parsed (string) literal; and
-                           // documentation comments are here too
-    next: PToken;          // next token; can be used for arbitrary look-ahead
-  end;
-
-  PLexer = ^TLexer;
-  TLexer = object(TBaseLexer)
-    filename: string;
-    indentStack: array of int; // the indentation stack
-    dedent: int;             // counter for DED token generation
-    indentAhead: int;        // if > 0 an indendation has already been read
-                             // this is needed because scanning comments
-                             // needs so much look-ahead
-  end;
-
-var
-  gLinesCompiled: int; // all lines that have been compiled
-
-procedure pushInd(var L: TLexer; indent: int);
-procedure popInd(var L: TLexer);
-
-function isKeyword(kind: TTokType): boolean;
-
-procedure openLexer(out lex: TLexer; const filename: string;
-                    inputstream: PLLStream);
-
-procedure rawGetTok(var L: TLexer; var tok: TToken);
-// reads in the next token into tok and skips it
-
-function getColumn(const L: TLexer): int;
-
-function getLineInfo(const L: TLexer): TLineInfo;
-
-procedure closeLexer(var lex: TLexer);
-
-procedure PrintTok(tok: PToken);
-function tokToStr(tok: PToken): string;
-
-// auxiliary functions:
-procedure lexMessage(const L: TLexer; const msg: TMsgKind;
-                     const arg: string = '');
-
-// the Pascal scanner uses this too:
-procedure fillToken(var L: TToken);
-
-implementation
-
-function isKeyword(kind: TTokType): boolean;
-begin
-  result := (kind >= tokKeywordLow) and (kind <= tokKeywordHigh)
-end;
-
-procedure pushInd(var L: TLexer; indent: int);
-var
-  len: int;
-begin
-  len := length(L.indentStack);
-  setLength(L.indentStack, len+1);
-  if (indent > L.indentStack[len-1]) then
-    L.indentstack[len] := indent
-  else
-    InternalError('pushInd');
-  //writeln('push indent ', indent);
-end;
-
-procedure popInd(var L: TLexer);
-var
-  len: int;
-begin
-  len := length(L.indentStack);
-  setLength(L.indentStack, len-1);
-end;
-
-function findIdent(const L: TLexer; indent: int): boolean;
-var
-  i: int;
-begin
-  for i := length(L.indentStack)-1 downto 0 do
-    if L.indentStack[i] = indent then begin result := true; exit end;
-  result := false
-end;
-
-function tokToStr(tok: PToken): string;
-begin
-  case tok.tokType of
-    tkIntLit..tkInt64Lit:
-      result := toString(tok.iNumber);
-    tkFloatLit..tkFloat64Lit:
-      result := toStringF(tok.fNumber);
-    tkInvalid, tkStrLit..tkCharLit, tkComment:
-      result := tok.literal;
-    tkParLe..tkColon, tkEof, tkInd, tkSad, tkDed, tkAccent:
-      result := tokTypeToStr[tok.tokType];
-    else if (tok.ident <> nil) then
-      result := tok.ident.s
-    else begin
-      InternalError('tokToStr');
-      result := ''
-    end
-  end
-end;
-
-procedure PrintTok(tok: PToken);
-begin
-  write(output, TokTypeToStr[tok.tokType]);
-  write(output, ' '+'');
-  writeln(output, tokToStr(tok))
-end;
-
-// ----------------------------------------------------------------------------
-
-var
-  dummyIdent: PIdent;
-
-procedure fillToken(var L: TToken);
-begin
-  L.TokType := tkInvalid;
-  L.iNumber := 0;
-  L.Indent := 0;
-  L.literal := '';
-  L.fNumber := 0.0;
-  L.base := base10;
-  L.ident := dummyIdent; // this prevents many bugs!
-end;
-
-procedure openLexer(out lex: TLexer; const filename: string;
-                    inputstream: PLLStream);
-begin
-{@ignore}
-  FillChar(lex, sizeof(lex), 0);
-{@emit}
-  openBaseLexer(lex, inputstream);
-{@ignore}
-  setLength(lex.indentStack, 1);
-  lex.indentStack[0] := 0;
-{@emit lex.indentStack := @[0]; }
-  lex.filename := filename;
-  lex.indentAhead := -1;
-end;
-
-procedure closeLexer(var lex: TLexer);
-begin
-  inc(gLinesCompiled, lex.LineNumber);
-  closeBaseLexer(lex);
-end;
-
-function getColumn(const L: TLexer): int;
-begin
-  result := getColNumber(L, L.bufPos)
-end;
-
-function getLineInfo(const L: TLexer): TLineInfo;
-begin
-  result := newLineInfo(L.filename, L.linenumber, getColNumber(L, L.bufpos))
-end;
-
-procedure lexMessage(const L: TLexer; const msg: TMsgKind;
-                     const arg: string = '');
-begin
-  msgs.liMessage(getLineInfo(L), msg, arg)
-end;
-
-procedure lexMessagePos(var L: TLexer; const msg: TMsgKind; pos: int;
-                        const arg: string = '');
-var
-  info: TLineInfo;
-begin
-  info := newLineInfo(L.filename, L.linenumber, pos - L.lineStart);
-  msgs.liMessage(info, msg, arg);
-end;
-
-// ----------------------------------------------------------------------------
-
-procedure matchUnderscoreChars(var L: TLexer; var tok: TToken;
-                               const chars: TCharSet);
-// matches ([chars]_)*
-var
-  pos: int;
-  buf: PChar;
-begin
-  pos := L.bufpos; // use registers for pos, buf
-  buf := L.buf;
-  repeat
-    if buf[pos] in chars then begin
-      addChar(tok.literal, buf[pos]);
-      Inc(pos)
-    end
-    else break;
-    if buf[pos] = '_' then begin
-      addChar(tok.literal, '_');
-      Inc(pos);
-    end;
-  until false;
-  L.bufPos := pos;
-end;
-
-function matchTwoChars(const L: TLexer; first: Char;
-                       const second: TCharSet): Boolean;
-begin
-  result := (L.buf[L.bufpos] = first) and (L.buf[L.bufpos+1] in Second);
-end;
-
-function isFloatLiteral(const s: string): boolean;
-var
-  i: int;
-begin
-  for i := strStart to length(s)+strStart-1 do
-    if s[i] in ['.','e','E'] then begin
-      result := true; exit
-    end;
-  result := false
-end;
-
-function GetNumber(var L: TLexer): TToken;
-var
-  pos, endpos: int;
-  xi: biggestInt;
-begin
-  // get the base:
-  result.tokType := tkIntLit; // int literal until we know better
-  result.literal := '';
-  result.base := base10; // BUGFIX
-  pos := L.bufpos;
-  // make sure the literal is correct for error messages:
-  matchUnderscoreChars(L, result, ['A'..'Z', 'a'..'z', '0'..'9']);
-  if (L.buf[L.bufpos] = '.') and (L.buf[L.bufpos+1] in ['0'..'9']) then begin
-    addChar(result.literal, '.');
-    inc(L.bufpos);
-    //matchUnderscoreChars(L, result, ['A'..'Z', 'a'..'z', '0'..'9'])
-    matchUnderscoreChars(L, result, ['0'..'9']);
-    if L.buf[L.bufpos] in ['e', 'E'] then begin
-      addChar(result.literal, 'e');
-      inc(L.bufpos);
-      if L.buf[L.bufpos] in ['+', '-'] then begin
-        addChar(result.literal, L.buf[L.bufpos]);
-        inc(L.bufpos);
-      end;
-      matchUnderscoreChars(L, result, ['0'..'9']);
-    end
-  end;
-  endpos := L.bufpos;
-  if L.buf[endpos] = '''' then begin
-    //matchUnderscoreChars(L, result, ['''', 'f', 'F', 'i', 'I', '0'..'9']);
-    inc(endpos);
-    L.bufpos := pos; // restore position
-    case L.buf[endpos] of
-      'f', 'F': begin
-        inc(endpos);
-        if (L.buf[endpos] = '6') and (L.buf[endpos+1] = '4') then begin
-          result.tokType := tkFloat64Lit;
-          inc(endpos, 2);
-        end
-        else if (L.buf[endpos] = '3') and (L.buf[endpos+1] = '2') then begin
-          result.tokType := tkFloat32Lit;
-          inc(endpos, 2);
-        end
-        else lexMessage(L, errInvalidNumber, result.literal);
-      end;
-      'i', 'I': begin
-        inc(endpos);
-        if (L.buf[endpos] = '6') and (L.buf[endpos+1] = '4') then begin
-          result.tokType := tkInt64Lit;
-          inc(endpos, 2);
-        end
-        else if (L.buf[endpos] = '3') and (L.buf[endpos+1] = '2') then begin
-          result.tokType := tkInt32Lit;
-          inc(endpos, 2);
-        end
-        else if (L.buf[endpos] = '1') and (L.buf[endpos+1] = '6') then begin
-          result.tokType := tkInt16Lit;
-          inc(endpos, 2);
-        end
-        else if (L.buf[endpos] = '8') then begin
-          result.tokType := tkInt8Lit;
-          inc(endpos);
-        end
-        else lexMessage(L, errInvalidNumber, result.literal);
-      end;
-      else lexMessage(L, errInvalidNumber, result.literal);
-    end
-  end
-  else
-    L.bufpos := pos; // restore position
-
-  try
-    if (L.buf[pos] = '0') and (L.buf[pos+1] in ['x','X','b','B','o','O','c','C'])
-    then begin
-      inc(pos, 2);
-      xi := 0;
-      // it may be a base prefix
-      case L.buf[pos-1] of
-        'b', 'B': begin
-          result.base := base2;
-          while true do begin
-            case L.buf[pos] of
-              'A'..'Z', 'a'..'z', '2'..'9', '.': begin
-                lexMessage(L, errInvalidNumber, result.literal);
-                inc(pos)
-              end;
-              '_': inc(pos);
-              '0', '1': begin
-                xi := shlu(xi, 1) or (ord(L.buf[pos]) - ord('0'));
-                inc(pos);
-              end;
-              else break;
-            end
-          end
-        end;
-        'o', 'c', 'C': begin
-          result.base := base8;
-          while true do begin
-            case L.buf[pos] of
-              'A'..'Z', 'a'..'z', '8'..'9', '.': begin
-                lexMessage(L, errInvalidNumber, result.literal);
-                inc(pos)
-              end;
-              '_': inc(pos);
-              '0'..'7': begin
-                xi := shlu(xi, 3) or (ord(L.buf[pos]) - ord('0'));
-                inc(pos);
-              end;
-              else break;
-            end
-          end
-        end;
-        'O': lexMessage(L, errInvalidNumber, result.literal);
-        'x', 'X': begin
-          result.base := base16;
-          while true do begin
-            case L.buf[pos] of
-              'G'..'Z', 'g'..'z', '.': begin
-                lexMessage(L, errInvalidNumber, result.literal);
-                inc(pos);
-              end;
-              '_': inc(pos);
-              '0'..'9': begin
-                xi := shlu(xi, 4) or (ord(L.buf[pos]) - ord('0'));
-                inc(pos);
-              end;
-              'a'..'f': begin
-                xi := shlu(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10);
-                inc(pos);
-              end;
-              'A'..'F': begin
-                xi := shlu(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10);
-                inc(pos);
-              end;
-              else break;
-            end
-          end
-        end;
-        else InternalError(getLineInfo(L), 'getNumber');
-      end;
-      // now look at the optional type suffix:
-      case result.tokType of
-        tkIntLit, tkInt64Lit:
-          result.iNumber := xi;
-        tkInt8Lit:
-          result.iNumber := biggestInt(int8(toU8(int(xi))));
-        tkInt16Lit:
-          result.iNumber := biggestInt(toU16(int(xi)));
-        tkInt32Lit:
-          result.iNumber := biggestInt(toU32(xi));
-        tkFloat32Lit:
-          result.fNumber := ({@cast}PFloat32(addr(xi)))^;
-          // note: this code is endian neutral!
-          // XXX: Test this on big endian machine!
-        tkFloat64Lit:
-          result.fNumber := ({@cast}PFloat64(addr(xi)))^;
-        else InternalError(getLineInfo(L), 'getNumber');
-      end
-    end
-    else if isFloatLiteral(result.literal)
-         or (result.tokType = tkFloat32Lit)
-         or (result.tokType = tkFloat64Lit) then begin
-      result.fnumber := parseFloat(result.literal);
-      if result.tokType = tkIntLit then result.tokType := tkFloatLit;
-    end
-    else begin
-      result.iNumber := ParseBiggestInt(result.literal);
-      if (result.iNumber < low(int32)) or (result.iNumber > high(int32)) then
-      begin
-        if result.tokType = tkIntLit then result.tokType := tkInt64Lit
-        else if result.tokType <> tkInt64Lit then
-          lexMessage(L, errInvalidNumber, result.literal);
-      end
-    end;
-  except
-    on EInvalidValue do
-      lexMessage(L, errInvalidNumber, result.literal);
-  {@ignore}
-    on sysutils.EIntOverflow do
-      lexMessage(L, errNumberOutOfRange, result.literal);
-  {@emit}
-    on EOverflow do
-      lexMessage(L, errNumberOutOfRange, result.literal);
-    on EOutOfRange do
-      lexMessage(L, errNumberOutOfRange, result.literal);      
-  end;
-  L.bufpos := endpos;
-end;
-
-procedure handleHexChar(var L: TLexer; var xi: int);
-begin
-  case L.buf[L.bufpos] of
-    '0'..'9': begin
-      xi := (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('0'));
-      inc(L.bufpos);
-    end;
-    'a'..'f': begin
-      xi := (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('a') + 10);
-      inc(L.bufpos);
-    end;
-    'A'..'F': begin
-      xi := (xi shl 4) or (ord(L.buf[L.bufpos]) - ord('A') + 10);
-      inc(L.bufpos);
-    end;
-    else begin end // do nothing
-  end
-end;
-
-procedure handleDecChars(var L: TLexer; var xi: int);
-begin
-  while L.buf[L.bufpos] in ['0'..'9'] do begin
-    xi := (xi * 10) + (ord(L.buf[L.bufpos]) - ord('0'));
-    inc(L.bufpos);
-  end;
-end;
-
-procedure getEscapedChar(var L: TLexer; var tok: TToken);
-var
-  xi: int;
-begin
-  inc(L.bufpos); // skip '\'
-  case L.buf[L.bufpos] of
-    'n', 'N': begin
-      if tok.toktype = tkCharLit then
-        lexMessage(L, errNnotAllowedInCharacter);
-      tok.literal := tok.literal +{&} tnl;
-      Inc(L.bufpos);
-    end;
-    'r', 'R', 'c', 'C': begin addChar(tok.literal, CR); Inc(L.bufpos); end;
-    'l', 'L': begin addChar(tok.literal, LF); Inc(L.bufpos); end;
-    'f', 'F': begin addChar(tok.literal, FF); inc(L.bufpos); end;
-    'e', 'E': begin addChar(tok.literal, ESC); Inc(L.bufpos); end;
-    'a', 'A': begin addChar(tok.literal, BEL); Inc(L.bufpos); end;
-    'b', 'B': begin addChar(tok.literal, BACKSPACE); Inc(L.bufpos); end;
-    'v', 'V': begin addChar(tok.literal, VT); Inc(L.bufpos); end;
-    't', 'T': begin addChar(tok.literal, Tabulator); Inc(L.bufpos); end;
-    '''', '"': begin addChar(tok.literal, L.buf[L.bufpos]); Inc(L.bufpos); end;
-    '\': begin addChar(tok.literal, '\'); Inc(L.bufpos) end;
-    'x', 'X': begin
-      inc(L.bufpos);
-      xi := 0;
-      handleHexChar(L, xi);
-      handleHexChar(L, xi);
-      addChar(tok.literal, Chr(xi));
-    end;
-    '0'..'9': begin
-      if matchTwoChars(L, '0', ['0'..'9']) then
-      // this warning will make it easier for newcomers:
-        lexMessage(L, warnOctalEscape);
-      xi := 0;
-      handleDecChars(L, xi);
-      if (xi <= 255) then
-        addChar(tok.literal, Chr(xi))
-      else
-        lexMessage(L, errInvalidCharacterConstant)
-    end
-    else lexMessage(L, errInvalidCharacterConstant)
-  end
-end;
-
-function HandleCRLF(var L: TLexer; pos: int): int;
-begin
-  case L.buf[pos] of
-    CR: begin
-      if getColNumber(L, pos) > MaxLineLength then
-        lexMessagePos(L, hintLineTooLong, pos);
-      result := lexbase.HandleCR(L, pos)
-    end;
-    LF: begin
-      if getColNumber(L, pos) > MaxLineLength then
-        lexMessagePos(L, hintLineTooLong, pos);
-      result := lexbase.HandleLF(L, pos)
-    end;
-    else result := pos
-  end
-end;
-
-procedure getString(var L: TLexer; var tok: TToken; rawMode: Boolean);
-var
-  line, line2, pos: int;
-  c: Char;
-  buf: PChar;
-begin
-  pos := L.bufPos + 1; // skip "
-  buf := L.buf; // put `buf` in a register
-  line := L.linenumber; // save linenumber for better error message
-  if (buf[pos] = '"') and (buf[pos+1] = '"') then begin
-    tok.tokType := tkTripleStrLit;
-    // long string literal:
-    inc(pos, 2); // skip ""
-    // skip leading newline:
-    pos := HandleCRLF(L, pos);
-    buf := L.buf;
-    repeat
-      case buf[pos] of
-        '"': begin
-          if (buf[pos+1] = '"') and (buf[pos+2] = '"') then
-            break;
-          addChar(tok.literal, '"');
-          Inc(pos)
-        end;
-        CR, LF: begin
-          pos := HandleCRLF(L, pos);
-          buf := L.buf;
-          tok.literal := tok.literal +{&} tnl;
-        end;
-        lexbase.EndOfFile: begin
-          line2 := L.linenumber;
-          L.LineNumber := line;
-          lexMessagePos(L, errClosingTripleQuoteExpected, L.lineStart);
-          L.LineNumber := line2;
-          break
-        end
-        else begin
-          addChar(tok.literal, buf[pos]);
-          Inc(pos)
-        end
-      end
-    until false;
-    L.bufpos := pos + 3 // skip the three """
-  end
-  else begin // ordinary string literal
-    if rawMode then tok.tokType := tkRStrLit
-    else tok.tokType := tkStrLit;
-    repeat
-      c := buf[pos];
-      if c = '"' then begin
-        inc(pos); // skip '"'
-        break
-      end;
-      if c in [CR, LF, lexbase.EndOfFile] then begin
-        lexMessage(L, errClosingQuoteExpected);
-        break
-      end;
-      if (c = '\') and not rawMode then begin
-        L.bufPos := pos;
-        getEscapedChar(L, tok);
-        pos := L.bufPos;
-      end
-      else begin
-        addChar(tok.literal, c);
-        Inc(pos)
-      end
-    until false;
-    L.bufpos := pos;
-  end
-end;
-
-procedure getCharacter(var L: TLexer; var tok: TToken);
-var
-  c: Char;
-begin
-  Inc(L.bufpos); // skip '
-  c := L.buf[L.bufpos];
-  case c of
-    #0..Pred(' '), '''': lexMessage(L, errInvalidCharacterConstant);
-    '\': getEscapedChar(L, tok);
-    else begin
-      tok.literal := c + '';
-      Inc(L.bufpos);
-    end
-  end;
-  if L.buf[L.bufpos] <> '''' then lexMessage(L, errMissingFinalQuote);
-  inc(L.bufpos); // skip '
-end;
-
-{@ignore}
-{$ifopt Q+} {$define Q_on} {$Q-} {$endif}
-{$ifopt R+} {$define R_on} {$R-} {$endif}
-{@emit}
-procedure getSymbol(var L: TLexer; var tok: TToken);
-var
-  pos: int;
-  c: Char;
-  buf: pchar;
-  h: THash; // hashing algorithm inlined
-begin
-  h := 0;
-  pos := L.bufpos;
-  buf := L.buf;
-  while true do begin
-    c := buf[pos];
-    case c of
-      'a'..'z', '0'..'9', #128..#255: begin
-        h := h +{%} Ord(c);
-        h := h +{%} h shl 10;
-        h := h xor (h shr 6)
-      end;
-      'A'..'Z': begin
-        c := chr(ord(c) + (ord('a')-ord('A'))); // toLower()
-        h := h +{%} Ord(c);
-        h := h +{%} h shl 10;
-        h := h xor (h shr 6)
-      end;
-      '_': begin end;
-      else break
-    end;
-    Inc(pos)
-  end;
-  h := h +{%} h shl 3;
-  h := h xor (h shr 11);
-  h := h +{%} h shl 15;
-  tok.ident := getIdent(addr(L.buf[L.bufpos]), pos-L.bufpos, h);
-  L.bufpos := pos;
-  if (tok.ident.id < ord(tokKeywordLow)-ord(tkSymbol)) or
-     (tok.ident.id > ord(tokKeywordHigh)-ord(tkSymbol)) then
-    tok.tokType := tkSymbol
-  else
-    tok.tokType := TTokType(tok.ident.id+ord(tkSymbol));
-  if buf[pos] = '"' then begin
-    getString(L, tok, true);
-    if tok.tokType = tkRStrLit then tok.tokType := tkCallRStrLit
-    else tok.tokType := tkCallTripleStrLit
-  end
-end;
-
-procedure getOperator(var L: TLexer; var tok: TToken);
-var
-  pos: int;
-  c: Char;
-  buf: pchar;
-  h: THash; // hashing algorithm inlined
-begin
-  pos := L.bufpos;
-  buf := L.buf;
-  h := 0;
-  while true do begin
-    c := buf[pos];
-    if c in OpChars then begin
-      h := h +{%} Ord(c);
-      h := h +{%} h shl 10;
-      h := h xor (h shr 6)
-    end
-    else break;
-    Inc(pos)
-  end;
-  h := h +{%} h shl 3;
-  h := h xor (h shr 11);
-  h := h +{%} h shl 15;
-  tok.ident := getIdent(addr(L.buf[L.bufpos]), pos-L.bufpos, h);
-  if (tok.ident.id < oprLow) or (tok.ident.id > oprHigh) then
-    tok.tokType := tkOpr
-  else
-    tok.tokType := TTokType(tok.ident.id - oprLow + ord(tkColon));
-  L.bufpos := pos
-end;
-{@ignore}
-{$ifdef Q_on} {$undef Q_on} {$Q+} {$endif}
-{$ifdef R_on} {$undef R_on} {$R+} {$endif}
-{@emit}
-
-procedure handleIndentation(var L: TLexer; var tok: TToken; indent: int);
-var
-  i: int;
-begin
-  tok.indent := indent;
-  i := high(L.indentStack);
-  if indent > L.indentStack[i] then
-    tok.tokType := tkInd
-  else if indent = L.indentStack[i] then
-    tok.tokType := tkSad
-  else begin
-    // check we have the indentation somewhere in the stack:
-    while (i >= 0) and (indent <> L.indentStack[i]) do begin
-      dec(i);
-      inc(L.dedent);
-    end;
-    dec(L.dedent);
-    tok.tokType := tkDed;
-    if i < 0 then begin
-      tok.tokType := tkSad; // for the parser it is better as SAD
-      lexMessage(L, errInvalidIndentation);
-    end
-  end
-end;
-
-procedure scanComment(var L: TLexer; var tok: TToken);
-var
-  buf: PChar;
-  pos, col: int;
-  indent: int;
-begin
-  pos := L.bufpos;
-  buf := L.buf;
-  // a comment ends if the next line does not start with the # on the same
-  // column after only whitespace
-  tok.tokType := tkComment;
-  col := getColNumber(L, pos);
-  while true do begin
-    while not (buf[pos] in [CR, LF, lexbase.EndOfFile]) do begin
-      addChar(tok.literal, buf[pos]); inc(pos);
-    end;
-    pos := handleCRLF(L, pos);
-    buf := L.buf;
-    indent := 0;
-    while buf[pos] = ' ' do begin inc(pos); inc(indent) end;
-    if (buf[pos] = '#') and (col = indent) then begin
-      tok.literal := tok.literal +{&} nl;
-    end
-    else begin
-      if buf[pos] > ' ' then begin
-        L.indentAhead := indent;
-        inc(L.dedent)
-      end;
-      break
-    end
-  end;
-  L.bufpos := pos;
-end;
-
-procedure skip(var L: TLexer; var tok: TToken);
-var
-  buf: PChar;
-  indent, pos: int;
-begin
-  pos := L.bufpos;
-  buf := L.buf;
-  repeat
-    case buf[pos] of
-      ' ': Inc(pos);
-      Tabulator: begin
-        lexMessagePos(L, errTabulatorsAreNotAllowed, pos);
-        inc(pos); // BUGFIX
-      end;
-      // newline is special:
-      CR, LF: begin
-        pos := HandleCRLF(L, pos);
-        buf := L.buf;
-        indent := 0;
-        while buf[pos] = ' ' do begin
-          Inc(pos); Inc(indent)
-        end;
-        if (buf[pos] > ' ') then begin
-          handleIndentation(L, tok, indent);
-          break;
-        end
-      end;
-      else break // EndOfFile also leaves the loop
-    end
-  until false;
-  L.bufpos := pos;
-end;
-
-procedure rawGetTok(var L: TLexer; var tok: TToken);
-var
-  c: Char;
-begin
-  fillToken(tok);
-  if L.dedent > 0 then begin
-    dec(L.dedent);
-    if L.indentAhead >= 0 then begin
-      handleIndentation(L, tok, L.indentAhead);
-      L.indentAhead := -1;
-    end
-    else
-      tok.tokType := tkDed;
-    exit;
-  end;
-  // Skip whitespace, comments:
-  skip(L, tok); // skip
-  // got an documentation comment or tkIndent, return that:
-  if tok.toktype <> tkInvalid then exit;
-
-  c := L.buf[L.bufpos];
-  if c in SymStartChars - ['r', 'R', 'l'] then // common case first
-    getSymbol(L, tok)
-  else if c in ['0'..'9'] then
-    tok := getNumber(L)
-  else begin
-    case c of
-      '#': scanComment(L, tok);
-      ':': begin
-        tok.tokType := tkColon;
-        inc(L.bufpos);
-      end;
-      ',': begin
-        tok.toktype := tkComma;
-        Inc(L.bufpos)
-      end;
-      'l': begin
-        // if we parsed exactly one character and its a small L (l), this
-        // is treated as a warning because it may be confused with the number 1
-        if not (L.buf[L.bufpos+1] in (SymChars+['_'])) then
-          lexMessage(L, warnSmallLshouldNotBeUsed);
-        getSymbol(L, tok);
-      end;
-      'r', 'R': begin
-        if L.buf[L.bufPos+1] = '"' then begin
-          Inc(L.bufPos);
-          getString(L, tok, true);
-        end
-        else getSymbol(L, tok);
-      end;
-      '(': begin
-        Inc(L.bufpos);
-        if (L.buf[L.bufPos] = '.')
-        and (L.buf[L.bufPos+1] <> '.') then begin
-          tok.toktype := tkParDotLe;
-          Inc(L.bufpos);
-        end
-        else
-          tok.toktype := tkParLe;
-      end;
-      ')': begin
-        tok.toktype := tkParRi;
-        Inc(L.bufpos)
-      end;
-      '[': begin
-        Inc(L.bufpos);
-        if (L.buf[L.bufPos] = '.')
-        and (L.buf[L.bufPos+1] <> '.') then begin
-          tok.toktype := tkBracketDotLe;
-          Inc(L.bufpos);
-        end
-        else
-          tok.toktype := tkBracketLe;
-      end;
-      ']': begin
-        tok.toktype := tkBracketRi;
-        Inc(L.bufpos)
-      end;
-      '.': begin
-        if L.buf[L.bufPos+1] = ']' then begin
-          tok.tokType := tkBracketDotRi;
-          Inc(L.bufpos, 2);
-        end
-        else if L.buf[L.bufPos+1] = '}' then begin
-          tok.tokType := tkCurlyDotRi;
-          Inc(L.bufpos, 2);
-        end
-        else if L.buf[L.bufPos+1] = ')' then begin
-          tok.tokType := tkParDotRi;
-          Inc(L.bufpos, 2);
-        end
-        else
-          getOperator(L, tok)
-      end;
-      '{': begin
-        Inc(L.bufpos);
-        if (L.buf[L.bufPos] = '.')
-        and (L.buf[L.bufPos+1] <> '.') then begin
-          tok.toktype := tkCurlyDotLe;
-          Inc(L.bufpos);
-        end
-        else
-          tok.toktype := tkCurlyLe;
-      end;
-      '}': begin
-        tok.toktype := tkCurlyRi;
-        Inc(L.bufpos)
-      end;
-      ';': begin
-        tok.toktype := tkSemiColon;
-        Inc(L.bufpos)
-      end;
-      '`': begin
-        tok.tokType := tkAccent;
-        Inc(L.bufpos);
-      end;
-      '"': getString(L, tok, false);
-      '''': begin
-        getCharacter(L, tok);
-        tok.tokType := tkCharLit;
-      end;
-      lexbase.EndOfFile: tok.toktype := tkEof;
-      else if c in OpChars then
-        getOperator(L, tok)
-      else begin
-        tok.literal := c + '';
-        tok.tokType := tkInvalid;
-        lexMessage(L, errInvalidToken, c +{&} ' (\' +{&} toString(ord(c)) + ')');
-        Inc(L.bufpos);
-      end
-    end
-  end
-end;
-
-initialization
-  dummyIdent := getIdent('');
-end.