summary refs log tree commit diff stats
path: root/compiler/pas2nim/paslex.nim
blob: 598a27158c7fb9efc2d8f22ddffc52da7d0a4a9b (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
#The MIME name of this charset. 
Mcp737

#Name as a Display Charset (used on Options screen)
ODosGreek (cp737)

#
#    Name:     cp737_DOSGreek to Unicode table
#    Unicode version: 2.0
#    Table version: 2.00
#    Table format:  Format A
#    Date:          04/24/96
#    Authors:       Lori Brownell <loribr@microsoft.com>
#                   K.D. Chang    <a-kchang@microsoft.com>
#    General notes: none
#
#    Format: Three tab-separated columns
#        Column #1 is the cp737_DOSGreek code (in hex)
#        Column #2 is the Unicode (in hex as 0xXXXX)
#        Column #3 is the Unicode name (follows a comment sign, '#')
#
#    The entries are in cp737_DOSGreek order
#
##################

0x20-0x7f       idem
#
0x80	U+0391	#GREEK CAPITAL LETTER ALPHA
0x81	U+0392	#GREEK CAPITAL LETTER BETA
0x82	U+0393	#GREEK CAPITAL LETTER GAMMA
0x83	U+0394	#GREEK CAPITAL LETTER DELTA
0x84	U+0395	#GREEK CAPITAL LETTER EPSILON
0x85	U+0396	#GREEK CAPITAL LETTER ZETA
0x86	U+0397	#GREEK CAPITAL LETTER ETA
0x87	U+0398	#GREEK CAPITAL LETTER THETA
0x88	U+0399	#GREEK CAPITAL LETTER IOTA
0x89	U+039a	#GREEK CAPITAL LETTER KAPPA
0x8a	U+039b	#GREEK CAPITAL LETTER LAMDA
0x8b	U+039c	#GREEK CAPITAL LETTER MU
0x8c	U+039d	#GREEK CAPITAL LETTER NU
0x8d	U+039e	#GREEK CAPITAL LETTER XI
0x8e	U+039f	#GREEK CAPITAL LETTER OMICRON
0x8f	U+03a0	#GREEK CAPITAL LETTER PI
0x90	U+03a1	#GREEK CAPITAL LETTER RHO
0x91	U+03a3	#GREEK CAPITAL LETTER SIGMA
0x92	U+03a4	#GREEK CAPITAL LETTER TAU
0x93	U+03a5	#GREEK CAPITAL LETTER UPSILON
0x94	U+03a6	#GREEK CAPITAL LETTER PHI
0x95	U+03a7	#GREEK CAPITAL LETTER CHI
0x96	U+03a8	#GREEK CAPITAL LETTER PSI
0x97	U+03a9	#GREEK CAPITAL LETTER OMEGA
0x98	U+03b1	#GREEK SMALL LETTER ALPHA
0x99	U+03b2	#GREEK SMALL LETTER BETA
0x9a	U+03b3	#GREEK SMALL LETTER GAMMA
0x9b	U+03b4	#GREEK SMALL LETTER DELTA
0x9c	U+03b5	#GREEK SMALL LETTER EPSILON
0x9d	U+03b6	#GREEK SMALL LETTER ZETA
0x9e	U+03b7	#GREEK SMALL LETTER ETA
0x9f	U+03b8	#GREEK SMALL LETTER THETA
0xa0	U+03b9	#GREEK SMALL LETTER IOTA
0xa1	U+03ba	#GREEK SMALL LETTER KAPPA
0xa2	U+03bb	#GREEK SMALL LETTER LAMDA
0xa3	U+03bc	#GREEK SMALL LETTER MU
0xa4	U+03bd	#GREEK SMALL LETTER NU
0xa5	U+03be	#GREEK SMALL LETTER XI
0xa6	U+03bf	#GREEK SMALL LETTER OMICRON
0xa7	U+03c0	#GREEK SMALL LETTER PI
0xa8	U+03c1	#GREEK SMALL LETTER RHO
0xa9	U+03c3	#GREEK SMALL LETTER SIGMA
0xaa	U+03c2	#GREEK SMALL LETTER FINAL SIGMA
0xab	U+03c4	#GREEK SMALL LETTER TAU
0xac	U+03c5	#GREEK SMALL LETTER UPSILON
0xad	U+03c6	#GREEK SMALL LETTER PHI
0xae	U+03c7	#GREEK SMALL LETTER CHI
0xaf	U+03c8	#GREEK SMALL LETTER PSI
0xb0	U+2591	#LIGHT SHADE
0xb1	U+2592	#MEDIUM SHADE
0xb2	U+2593	#DARK SHADE
0xb3	U+2502	#BOX DRAWINGS LIGHT VERTICAL
0xb4	U+2524	#BOX DRAWINGS LIGHT VERTICAL AND LEFT
0xb5	U+2561	#BOX DRAWINGS VERTICAL SINGLE AND LEFT DOUBLE
0xb6	U+2562	#BOX DRAWINGS VERTICAL DOUBLE AND LEFT SINGLE
0xb7	U+2556	#BOX DRAWINGS DOWN DOUBLE AND LEFT SINGLE
0xb8	U+2555	#BOX DRAWINGS DOWN SINGLE AND LEFT DOUBLE
0xb9	U+2563	#BOX DRAWINGS DOUBLE VERTICAL AND LEFT
0xba	U+2551	#BOX DRAWINGS DOUBLE VERTICAL
0xbb	U+2557	#BOX DRAWINGS DOUBLE DOWN AND LEFT
0xbc	U+255d	#BOX DRAWINGS DOUBLE UP AND LEFT
0xbd	U+255c	#BOX DRAWINGS UP DOUBLE AND LEFT SINGLE
0xbe	U+255b	#BOX DRAWINGS UP SINGLE AND LEFT DOUBLE
0xbf	U+2510	#BOX DRAWINGS LIGHT DOWN AND LEFT
0xc0	U+2514	#BOX DRAWINGS LIGHT UP AND RIGHT
0xc1	U+2534	#BOX DRAWINGS LIGHT UP AND HORIZONTAL
0xc2	U+252c	#BOX DRAWINGS LIGHT DOWN AND HORIZONTAL
0xc3	U+251c	#BOX DRAWINGS LIGHT VERTICAL AND RIGHT
0xc4	U+2500	#BOX DRAWINGS LIGHT HORIZONTAL
0xc5	U+253c	#BOX DRAWINGS LIGHT VERTICAL AND HORIZONTAL
0xc6	U+255e	#BOX DRAWINGS VERTICAL SINGLE AND RIGHT DOUBLE
0xc7	U+255f	#BOX DRAWINGS VERTICAL DOUBLE AND RIGHT SINGLE
0xc8	U+255a	#BOX DRAWINGS DOUBLE UP AND RIGHT
0xc9	U+2554	#BOX DRAWINGS DOUBLE DOWN AND RIGHT
0xca	U+2569	#BOX DRAWINGS DOUBLE UP AND HORIZONTAL
0xcb	U+2566	#BOX DRAWINGS DOUBLE DOWN AND HORIZONTAL
0xcc	U+2560	#BOX DRAWINGS DOUBLE VERTICAL AND RIGHT
0xcd	U+2550	#BOX DRAWINGS DOUBLE HORIZONTAL
0xce	U+256c	#BOX DRAWINGS DOUBLE VERTICAL AND HORIZONTAL
0xcf	U+2567	#BOX DRAWINGS UP SINGLE AND HORIZONTAL DOUBLE
0xd0	U+2568	#BOX DRAWINGS UP DOUBLE AND HORIZONTAL SINGLE
0xd1	U+2564	#BOX DRAWINGS DOWN SINGLE AND HORIZONTAL DOUBLE
0xd2	U+2565	#BOX DRAWINGS DOWN DOUBLE AND HORIZONTAL SINGLE
0xd3	U+2559	#BOX DRAWINGS UP DOUBLE AND RIGHT SINGLE
0xd4	U+2558	#BOX DRAWINGS UP SINGLE AND RIGHT DOUBLE
0xd5	U+2552	#BOX DRAWINGS DOWN SINGLE AND RIGHT DOUBLE
0xd6	U+2553	#BOX DRAWINGS DOWN DOUBLE AND RIGHT SINGLE
0xd7	U+256b	#BOX DRAWINGS VERTICAL DOUBLE AND HORIZONTAL SINGLE
0xd8	U+256a	#BOX DRAWINGS VERTICAL SINGLE AND HORIZONTAL DOUBLE
0xd9	U+2518	#BOX DRAWINGS LIGHT UP AND LEFT
0xda	U+250c	#BOX DRAWINGS LIGHT DOWN AND RIGHT
0xdb	U+2588	#FULL BLOCK
0xdc	U+2584	#LOWER HALF BLOCK
0xdd	U+258c	#LEFT HALF BLOCK
0xde	U+2590	#RIGHT HALF BLOCK
0xdf	U+2580	#UPPER HALF BLOCK
0xe0	U+03c9	#GREEK SMALL LETTER OMEGA
0xe1	U+03ac	#GREEK SMALL LETTER ALPHA WITH TONOS
0xe2	U+03ad	#GREEK SMALL LETTER EPSILON WITH TONOS
0xe3	U+03ae	#GREEK SMALL LETTER ETA WITH TONOS
0xe4	U+03ca	#GREEK SMALL LETTER IOTA WITH DIALYTIKA
0xe5	U+03af	#GREEK SMALL LETTER IOTA WITH TONOS
0xe6	U+03cc	#GREEK SMALL LETTER OMICRON WITH TONOS
0xe7	U+03cd	#GREEK SMALL LETTER UPSILON WITH TONOS
0xe8	U+03cb	#GREEK SMALL LETTER UPSILON WITH DIALYTIKA
0xe9	U+03ce	#GREEK SMALL LETTER OMEGA WITH TONOS
0xea	U+0386	#GREEK CAPITAL LETTER ALPHA WITH TONOS
0xeb	U+0388	#GREEK CAPITAL LETTER EPSILON WITH TONOS
0xec	U+0389	#GREEK CAPITAL LETTER ETA WITH TONOS
0xed	U+038a	#GREEK CAPITAL LETTER IOTA WITH TONOS
0xee	U+038c	#GREEK CAPITAL LETTER OMICRON WITH TONOS
0xef	U+038e	#GREEK CAPITAL LETTER UPSILON WITH TONOS
0xf0	U+038f	#GREEK CAPITAL LETTER OMEGA WITH TONOS
0xf1	U+00b1	#PLUS-MINUS SIGN
0xf2	U+2265	#GREATER-THAN OR EQUAL TO
0xf3	U+2264	#LESS-THAN OR EQUAL TO
0xf4	U+03aa	#GREEK CAPITAL LETTER IOTA WITH DIALYTIKA
0xf5	U+03ab	#GREEK CAPITAL LETTER UPSILON WITH DIALYTIKA
0xf6	U+00f7	#DIVISION SIGN
0xf7	U+2248	#ALMOST EQUAL TO
0xf8	U+00b0	#DEGREE SIGN
0xf9	U+2219	#BULLET OPERATOR
0xfa	U+00b7	#MIDDLE DOT
0xfb	U+221a	#SQUARE ROOT
0xfc	U+207f	#SUPERSCRIPT LATIN SMALL LETTER N
0xfd	U+00b2	#SUPERSCRIPT TWO
0xfe	U+25a0	#BLACK SQUARE
0xff	U+00a0	#NO-BREAK SPACE

# TRADE MARK SIGN:
U+2122:(TM)
1'>511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570
#
#
#      Pas2nim - Pascal to Nimrod source converter
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# This module implements a FreePascal scanner. This is an adaption from
# the scanner module.

import
  hashes, options, msgs, strutils, platform, idents, lexbase, llstream

const
  MaxLineLength* = 80         # lines longer than this lead to a warning
  numChars*: TCharSet = {'0'..'9', 'a'..'z', 'A'..'Z'}
  SymChars*: TCharSet = {'a'..'z', 'A'..'Z', '0'..'9', '\x80'..'\xFF'}
  SymStartChars*: TCharSet = {'a'..'z', 'A'..'Z', '\x80'..'\xFF'}
  OpChars*: TCharSet = {'+', '-', '*', '/', '<', '>', '!', '?', '^', '.', '|',
    '=', ':', '%', '&', '$', '@', '~', '\x80'..'\xFF'}

# keywords are sorted!

type
  TTokKind* = enum
    pxInvalid, pxEof,
    pxAnd, pxArray, pxAs, pxAsm, pxBegin, pxCase, pxClass, pxConst,
    pxConstructor, pxDestructor, pxDiv, pxDo, pxDownto, pxElse, pxEnd, pxExcept,
    pxExports, pxFinalization, pxFinally, pxFor, pxFunction, pxGoto, pxIf,
    pxImplementation, pxIn, pxInherited, pxInitialization, pxInline,
    pxInterface, pxIs, pxLabel, pxLibrary, pxMod, pxNil, pxNot, pxObject, pxOf,
    pxOr, pxOut, pxPacked, pxProcedure, pxProgram, pxProperty, pxRaise,
    pxRecord, pxRepeat, pxResourcestring, pxSet, pxShl, pxShr, pxThen,
    pxThreadvar, pxTo, pxTry, pxType, pxUnit, pxUntil, pxUses, pxVar, pxWhile,
    pxWith, pxXor,
    pxComment,                # ordinary comment
    pxCommand,                # {@}
    pxAmp,                    # {&}
    pxPer,                    # {%}
    pxStrLit, pxSymbol,       # a symbol
    pxIntLit, pxInt64Lit, # long constant like 0x70fffffff or out of int range
    pxFloatLit, pxParLe, pxParRi, pxBracketLe, pxBracketRi, pxComma,
    pxSemiColon, pxColon,     # operators
    pxAsgn, pxEquals, pxDot, pxDotDot, pxHat, pxPlus, pxMinus, pxStar, pxSlash,
    pxLe, pxLt, pxGe, pxGt, pxNeq, pxAt, pxStarDirLe, pxStarDirRi, pxCurlyDirLe,
    pxCurlyDirRi
  TTokKinds* = set[TTokKind]

const
  Keywords = ["and", "array", "as", "asm", "begin", "case", "class", "const",
    "constructor", "destructor", "div", "do", "downto", "else", "end", "except",
    "exports", "finalization", "finally", "for", "function", "goto", "if",
    "implementation", "in", "inherited", "initialization", "inline",
    "interface", "is", "label", "library", "mod", "nil", "not", "object", "of",
    "or", "out", "packed", "procedure", "program", "property", "raise",
    "record", "repeat", "resourcestring", "set", "shl", "shr", "then",
    "threadvar", "to", "try", "type", "unit", "until", "uses", "var", "while",
    "with", "xor"]

  firstKeyword = pxAnd
  lastKeyword = pxXor

type
  TNumericalBase* = enum base10, base2, base8, base16
  TToken* = object
    xkind*: TTokKind          # the type of the token
    ident*: PIdent            # the parsed identifier
    iNumber*: BiggestInt      # the parsed integer literal
    fNumber*: BiggestFloat    # the parsed floating point literal
    base*: TNumericalBase     # the numerical base; only valid for int
                              # or float literals
    literal*: string          # the parsed (string) literal

  TLexer* = object of TBaseLexer
    filename*: string


proc getTok*(L: var TLexer, tok: var TToken)
proc PrintTok*(tok: TToken)
proc `$`*(tok: TToken): string
# implementation

var
  dummyIdent: PIdent
  gLinesCompiled: int

proc fillToken(L: var TToken) =
  L.xkind = pxInvalid
  L.iNumber = 0
  L.literal = ""
  L.fNumber = 0.0
  L.base = base10
  L.ident = dummyIdent        # this prevents many bugs!

proc openLexer*(lex: var TLexer, filename: string, inputstream: PLLStream) =
  openBaseLexer(lex, inputstream)
  lex.filename = filename

proc closeLexer*(lex: var TLexer) =
  inc(gLinesCompiled, lex.LineNumber)
  closeBaseLexer(lex)

proc getColumn(L: TLexer): int =
  result = getColNumber(L, L.bufPos)

proc getLineInfo*(L: TLexer): TLineInfo =
  result = newLineInfo(L.filename, L.linenumber, getColNumber(L, L.bufpos))

proc lexMessage*(L: TLexer, msg: TMsgKind, arg = "") =
  msgs.GlobalError(getLineInfo(L), msg, arg)

proc lexMessagePos(L: var TLexer, msg: TMsgKind, pos: int, arg = "") =
  var info = newLineInfo(L.filename, L.linenumber, pos - L.lineStart)
  msgs.GlobalError(info, msg, arg)

proc TokKindToStr*(k: TTokKind): string =
  case k
  of pxEof: result = "[EOF]"
  of firstKeyword..lastKeyword:
    result = keywords[ord(k)-ord(firstKeyword)]
  of pxInvalid, pxComment, pxStrLit: result = "string literal"
  of pxCommand: result = "{@"
  of pxAmp: result = "{&"
  of pxPer: result = "{%"
  of pxSymbol: result = "identifier"
  of pxIntLit, pxInt64Lit: result = "integer literal"
  of pxFloatLit: result = "floating point literal"
  of pxParLe: result = "("
  of pxParRi: result = ")"
  of pxBracketLe: result = "["
  of pxBracketRi: result = "]"
  of pxComma: result = ","
  of pxSemiColon: result = ";"
  of pxColon: result = ":"
  of pxAsgn: result = ":="
  of pxEquals: result = "="
  of pxDot: result = "."
  of pxDotDot: result = ".."
  of pxHat: result = "^"
  of pxPlus: result = "+"
  of pxMinus: result = "-"
  of pxStar: result = "*"
  of pxSlash: result = "/"
  of pxLe: result = "<="
  of pxLt: result = "<"
  of pxGe: result = ">="
  of pxGt: result = ">"
  of pxNeq: result = "<>"
  of pxAt: result = "@"
  of pxStarDirLe: result = "(*$"
  of pxStarDirRi: result = "*)"
  of pxCurlyDirLe: result = "{$"
  of pxCurlyDirRi: result = "}"

proc `$`(tok: TToken): string =
  case tok.xkind
  of pxInvalid, pxComment, pxStrLit: result = tok.literal
  of pxSymbol: result = tok.ident.s
  of pxIntLit, pxInt64Lit: result = $tok.iNumber
  of pxFloatLit: result = $tok.fNumber
  else: result = TokKindToStr(tok.xkind)

proc PrintTok(tok: TToken) =
  writeln(stdout, $tok)

proc setKeyword(L: var TLexer, tok: var TToken) =
  var x = binaryStrSearch(keywords, toLower(tok.ident.s))
  if x < 0: tok.xkind = pxSymbol
  else: tok.xKind = TTokKind(x + ord(firstKeyword))

proc matchUnderscoreChars(L: var TLexer, tok: var TToken, chars: TCharSet) =
  # matches ([chars]_)*
  var pos = L.bufpos              # use registers for pos, buf
  var buf = L.buf
  while true:
    if buf[pos] in chars:
      add(tok.literal, buf[pos])
      Inc(pos)
    else:
      break
    if buf[pos] == '_':
      add(tok.literal, '_')
      Inc(pos)
  L.bufPos = pos

proc isFloatLiteral(s: string): bool =
  for i in countup(0, len(s)-1):
    if s[i] in {'.', 'e', 'E'}:
      return true

proc getNumber2(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos + 1 # skip %
  if not (L.buf[pos] in {'0'..'1'}):
    # BUGFIX for %date%
    tok.xkind = pxInvalid
    add(tok.literal, '%')
    inc(L.bufpos)
    return
  tok.base = base2
  var xi: biggestInt = 0
  var bits = 0
  while true:
    case L.buf[pos]
    of 'A'..'Z', 'a'..'z', '2'..'9', '.':
      lexMessage(L, errInvalidNumber)
      inc(pos)
    of '_':
      inc(pos)
    of '0', '1':
      xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
      inc(pos)
      inc(bits)
    else: break
  tok.iNumber = xi
  if (bits > 32): tok.xkind = pxInt64Lit
  else: tok.xkind = pxIntLit
  L.bufpos = pos

proc getNumber16(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos + 1          # skip $
  tok.base = base16
  var xi: biggestInt = 0
  var bits = 0
  while true:
    case L.buf[pos]
    of 'G'..'Z', 'g'..'z', '.':
      lexMessage(L, errInvalidNumber)
      inc(pos)
    of '_': inc(pos)
    of '0'..'9':
      xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
      inc(pos)
      inc(bits, 4)
    of 'a'..'f':
      xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
      inc(pos)
      inc(bits, 4)
    of 'A'..'F':
      xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
      inc(pos)
      inc(bits, 4)
    else: break
  tok.iNumber = xi
  if (bits > 32):
    tok.xkind = pxInt64Lit
  else:
    tok.xkind = pxIntLit
  L.bufpos = pos

proc getNumber10(L: var TLexer, tok: var TToken) =
  tok.base = base10
  matchUnderscoreChars(L, tok, {'0'..'9'})
  if (L.buf[L.bufpos] == '.') and (L.buf[L.bufpos + 1] in {'0'..'9'}):
    add(tok.literal, '.')
    inc(L.bufpos)
    matchUnderscoreChars(L, tok, {'e', 'E', '+', '-', '0'..'9'})
  try:
    if isFloatLiteral(tok.literal):
      tok.fnumber = parseFloat(tok.literal)
      tok.xkind = pxFloatLit
    else:
      tok.iNumber = ParseInt(tok.literal)
      if (tok.iNumber < low(int32)) or (tok.iNumber > high(int32)):
        tok.xkind = pxInt64Lit
      else:
        tok.xkind = pxIntLit
  except EInvalidValue:
    lexMessage(L, errInvalidNumber, tok.literal)
  except EOverflow:
    lexMessage(L, errNumberOutOfRange, tok.literal)

proc HandleCRLF(L: var TLexer, pos: int): int =
  case L.buf[pos]
  of CR: result = lexbase.HandleCR(L, pos)
  of LF: result = lexbase.HandleLF(L, pos)
  else: result = pos

proc getString(L: var TLexer, tok: var TToken) =
  var xi: int
  var pos = L.bufPos
  var buf = L.buf
  while true:
    if buf[pos] == '\'':
      inc(pos)
      while true:
        case buf[pos]
        of CR, LF, lexbase.EndOfFile:
          lexMessage(L, errClosingQuoteExpected)
          break
        of '\'':
          inc(pos)
          if buf[pos] == '\'':
            inc(pos)
            add(tok.literal, '\'')
          else:
            break
        else:
          add(tok.literal, buf[pos])
          inc(pos)
    elif buf[pos] == '#':
      inc(pos)
      xi = 0
      case buf[pos]
      of '$':
        inc(pos)
        xi = 0
        while true:
          case buf[pos]
          of '0'..'9': xi = (xi shl 4) or (ord(buf[pos]) - ord('0'))
          of 'a'..'f': xi = (xi shl 4) or (ord(buf[pos]) - ord('a') + 10)
          of 'A'..'F': xi = (xi shl 4) or (ord(buf[pos]) - ord('A') + 10)
          else: break
          inc(pos)
      of '0'..'9':
        xi = 0
        while buf[pos] in {'0'..'9'}:
          xi = (xi * 10) + (ord(buf[pos]) - ord('0'))
          inc(pos)
      else: lexMessage(L, errInvalidCharacterConstant)
      if (xi <= 255): add(tok.literal, Chr(xi))
      else: lexMessage(L, errInvalidCharacterConstant)
    else:
      break
  tok.xkind = pxStrLit
  L.bufpos = pos

proc getSymbol(L: var TLexer, tok: var TToken) =
  var h: THash = 0
  var pos = L.bufpos
  var buf = L.buf
  while true:
    var c = buf[pos]
    case c
    of 'a'..'z', '0'..'9', '\x80'..'\xFF':
      h = h +% Ord(c)
      h = h +% h shl 10
      h = h xor (h shr 6)
    of 'A'..'Z':
      c = chr(ord(c) + (ord('a') - ord('A'))) # toLower()
      h = h +% Ord(c)
      h = h +% h shl 10
      h = h xor (h shr 6)
    of '_': nil
    else: break
    Inc(pos)
  h = h +% h shl 3
  h = h xor (h shr 11)
  h = h +% h shl 15
  tok.ident = getIdent(addr(L.buf[L.bufpos]), pos - L.bufpos, h)
  L.bufpos = pos
  setKeyword(L, tok)

proc scanLineComment(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos
  var buf = L.buf
  # a comment ends if the next line does not start with the // on the same
  # column after only whitespace
  tok.xkind = pxComment
  var col = getColNumber(L, pos)
  while true:
    inc(pos, 2)               # skip //
    add(tok.literal, '#')
    while not (buf[pos] in {CR, LF, lexbase.EndOfFile}):
      add(tok.literal, buf[pos])
      inc(pos)
    pos = handleCRLF(L, pos)
    buf = L.buf
    var indent = 0
    while buf[pos] == ' ':
      inc(pos)
      inc(indent)
    if (col == indent) and (buf[pos] == '/') and (buf[pos + 1] == '/'):
      tok.literal = tok.literal & "\n"
    else:
      break
  L.bufpos = pos

proc scanCurlyComment(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos
  var buf = L.buf
  tok.literal = "#"
  tok.xkind = pxComment
  while true:
    case buf[pos]
    of CR, LF:
      pos = HandleCRLF(L, pos)
      buf = L.buf
      add(tok.literal, "\n#")
    of '}':
      inc(pos)
      break
    of lexbase.EndOfFile: lexMessage(L, errTokenExpected, "}")
    else:
      add(tok.literal, buf[pos])
      inc(pos)
  L.bufpos = pos

proc scanStarComment(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos
  var buf = L.buf
  tok.literal = "#"
  tok.xkind = pxComment
  while true:
    case buf[pos]
    of CR, LF:
      pos = HandleCRLF(L, pos)
      buf = L.buf
      add(tok.literal, "\n#")
    of '*':
      inc(pos)
      if buf[pos] == ')':
        inc(pos)
        break
      else:
        add(tok.literal, '*')
    of lexbase.EndOfFile:
      lexMessage(L, errTokenExpected, "*)")
    else:
      add(tok.literal, buf[pos])
      inc(pos)
  L.bufpos = pos

proc skip(L: var TLexer, tok: var TToken) =
  var pos = L.bufpos
  var buf = L.buf
  while true:
    case buf[pos]
    of ' ', Tabulator:
      Inc(pos)                # newline is special:
    of CR, LF:
      pos = HandleCRLF(L, pos)
      buf = L.buf
    else:
      break                   # EndOfFile also leaves the loop
  L.bufpos = pos

proc getTok(L: var TLexer, tok: var TToken) =
  tok.xkind = pxInvalid
  fillToken(tok)
  skip(L, tok)
  var c = L.buf[L.bufpos]
  if c in SymStartChars:
    getSymbol(L, tok)
  elif c in {'0'..'9'}:
    getNumber10(L, tok)
  else:
    case c
    of ';':
      tok.xkind = pxSemicolon
      Inc(L.bufpos)
    of '/':
      if L.buf[L.bufpos + 1] == '/':
        scanLineComment(L, tok)
      else:
        tok.xkind = pxSlash
        inc(L.bufpos)
    of ',':
      tok.xkind = pxComma
      Inc(L.bufpos)
    of '(':
      Inc(L.bufpos)
      if (L.buf[L.bufPos] == '*'):
        if (L.buf[L.bufPos + 1] == '$'):
          Inc(L.bufpos, 2)
          skip(L, tok)
          getSymbol(L, tok)
          tok.xkind = pxStarDirLe
        else:
          inc(L.bufpos)
          scanStarComment(L, tok)
      else:
        tok.xkind = pxParLe
    of '*':
      inc(L.bufpos)
      if L.buf[L.bufpos] == ')':
        inc(L.bufpos)
        tok.xkind = pxStarDirRi
      else:
        tok.xkind = pxStar
    of ')':
      tok.xkind = pxParRi
      Inc(L.bufpos)
    of '[':
      Inc(L.bufpos)
      tok.xkind = pxBracketLe
    of ']':
      Inc(L.bufpos)
      tok.xkind = pxBracketRi
    of '.':
      inc(L.bufpos)
      if L.buf[L.bufpos] == '.':
        tok.xkind = pxDotDot
        inc(L.bufpos)
      else:
        tok.xkind = pxDot
    of '{':
      Inc(L.bufpos)
      case L.buf[L.bufpos]
      of '$':
        Inc(L.bufpos)
        skip(L, tok)
        getSymbol(L, tok)
        tok.xkind = pxCurlyDirLe
      of '&':
        Inc(L.bufpos)
        tok.xkind = pxAmp
      of '%':
        Inc(L.bufpos)
        tok.xkind = pxPer
      of '@':
        Inc(L.bufpos)
        tok.xkind = pxCommand
      else: scanCurlyComment(L, tok)
    of '+':
      tok.xkind = pxPlus
      inc(L.bufpos)
    of '-':
      tok.xkind = pxMinus
      inc(L.bufpos)
    of ':':
      inc(L.bufpos)
      if L.buf[L.bufpos] == '=':
        inc(L.bufpos)
        tok.xkind = pxAsgn
      else:
        tok.xkind = pxColon
    of '<':
      inc(L.bufpos)
      if L.buf[L.bufpos] == '>':
        inc(L.bufpos)
        tok.xkind = pxNeq
      elif L.buf[L.bufpos] == '=':
        inc(L.bufpos)
        tok.xkind = pxLe
      else:
        tok.xkind = pxLt
    of '>':
      inc(L.bufpos)
      if L.buf[L.bufpos] == '=':
        inc(L.bufpos)
        tok.xkind = pxGe
      else:
        tok.xkind = pxGt
    of '=':
      tok.xkind = pxEquals
      inc(L.bufpos)
    of '@':
      tok.xkind = pxAt
      inc(L.bufpos)
    of '^':
      tok.xkind = pxHat
      inc(L.bufpos)
    of '}':
      tok.xkind = pxCurlyDirRi
      Inc(L.bufpos)
    of '\'', '#':
      getString(L, tok)
    of '$':
      getNumber16(L, tok)
    of '%':
      getNumber2(L, tok)
    of lexbase.EndOfFile:
      tok.xkind = pxEof
    else:
      tok.literal = c & ""
      tok.xkind = pxInvalid
      lexMessage(L, errInvalidToken, c & " (\\" & $(ord(c)) & ')')
      Inc(L.bufpos)