18 files changed, 554 insertions, 0 deletions
diff --git a/tests/lexer/mlexerutils.nim b/tests/lexer/mlexerutils.nim
new file mode 100644
index 000000000..eae7a0006
--- /dev/null
+++ b/tests/lexer/mlexerutils.nim
@@ -0,0 +1,9 @@
+import macros
+
+macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
+
+macro assertAST*(expected: string, struct: untyped): untyped =
+  var ast = newLit(struct.treeRepr)
+  result = quote do:
+    if `ast` != `expected`:
+      doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
\ No newline at end of file
diff --git a/tests/lexer/tcustom_numeric_literals.nim b/tests/lexer/tcustom_numeric_literals.nim
new file mode 100644
index 000000000..35b4803d3
--- /dev/null
+++ b/tests/lexer/tcustom_numeric_literals.nim
@@ -0,0 +1,177 @@
+discard """
+  targets: "c cpp js"
+"""
+
+# Test tkStrNumLit
+
+import std/[macros, strutils]
+import mlexerutils
+
+# AST checks
+
+assertAST dedent """
+  StmtList
+    ProcDef
+      AccQuoted
+        Ident "\'"
+        Ident "wrap"
+      Empty
+      Empty
+      FormalParams
+        Ident "string"
+        IdentDefs
+          Ident "number"
+          Ident "string"
+          Empty
+      Empty
+      Empty
+      StmtList
+        Asgn
+          Ident "result"
+          Infix
+            Ident "&"
+            Infix
+              Ident "&"
+              StrLit "[["
+              Ident "number"
+            StrLit "]]"""":
+  proc `'wrap`(number: string): string =
+    result = "[[" & number & "]]"
+
+assertAST dedent """
+  StmtList
+    DotExpr
+      RStrLit "-38383839292839283928392839283928392839283.928493849385935898243e-50000"
+      Ident "\'wrap"""":
+  -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap
+
+proc `'wrap`(number: string): string = "[[" & number & "]]"
+proc wrap2(number: string): string = "[[" & number & "]]"
+doAssert lispReprStr(-1'wrap) == """(DotExpr (RStrLit "-1") (Ident "\'wrap"))"""
+
+template main =
+  block: # basic suffix usage
+    template `'twrap`(number: string): untyped =
+      number.`'wrap`
+    proc extraContext(): string =
+      22.40'wrap
+    proc `*`(left, right: string): string =
+      result = left & "times" & right
+    proc `+`(left, right: string): string =
+      result = left & "plus" & right
+
+    doAssert 1'wrap == "[[1]]"
+    doAssert -1'wrap == "[[-1]]":
+      "unable to resolve a negative integer-suffix pattern"
+    doAssert 12345.67890'wrap == "[[12345.67890]]"
+    doAssert 1'wrap*1'wrap == "[[1]]times[[1]]":
+      "unable to resolve an operator between two suffixed numeric literals"
+    doAssert 1'wrap+ -1'wrap == "[[1]]plus[[-1]]":  # will generate a compiler warning about inconsistent spacing
+      "unable to resolve a negative suffixed numeric literal following an operator"
+    doAssert 1'wrap + -1'wrap == "[[1]]plus[[-1]]"
+    doAssert 1'twrap == "[[1]]"
+    doAssert extraContext() == "[[22.40]]":
+      "unable to return a suffixed numeric literal by an implicit return"
+    doAssert 0x5a3a'wrap == "[[0x5a3a]]"
+    doAssert 0o5732'wrap == "[[0o5732]]"
+    doAssert 0b0101111010101'wrap == "[[0b0101111010101]]"
+    doAssert -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap == "[[-38383839292839283928392839283928392839283.928493849385935898243e-50000]]"
+    doAssert 1234.56'wrap == "[[1234.56]]":
+      "unable to properly account for context with suffixed numeric literals"
+
+  block: # verify that the i64, f32, etc builtin suffixes still parse correctly
+    const expectedF32: float32 = 123.125
+    proc `'f9`(number: string): string =   # proc starts with 'f' just like 'f32'
+      "[[" & number & "]]"
+    proc `'f32a`(number: string): string =   # looks even more like 'f32'
+      "[[" & number & "]]"
+    proc `'d9`(number: string): string =   # proc starts with 'd' just like the d suffix
+      "[[" & number & "]]"
+    proc `'i9`(number: string): string =   # proc starts with 'i' just like 'i64'
+      "[[" & number & "]]"
+    proc `'u9`(number: string): string =   # proc starts with 'u' just like 'u8'
+      "[[" & number & "]]"
+
+    doAssert 123.125f32 == expectedF32:
+      "failing to support non-quoted legacy f32 floating point suffix"
+    doAssert 123.125'f32 == expectedF32
+    doAssert 123.125e0'f32 == expectedF32
+    doAssert 1234.56'wrap == 1234.56'f9
+    doAssert 1234.56'wrap == 1234.56'f32a
+    doAssert 1234.56'wrap == 1234.56'd9
+    doAssert 1234.56'wrap == 1234.56'i9
+    doAssert 1234.56'wrap == 1234.56'u9
+    doAssert lispReprStr(1234.56'u9) == """(DotExpr (RStrLit "1234.56") (Ident "\'u9"))""":
+      "failed to properly build AST for suffix that starts with u"
+    doAssert -128'i8 == (-128).int8
+
+  block: # case checks
+    doAssert 1E2 == 100:
+      "lexer not handling upper-case exponent"
+    doAssert 1.0E2 == 100.0
+    doAssert 1e2 == 100
+    doAssert 0xdeadBEEF'wrap == "[[0xdeadBEEF]]":
+      "lexer not maintaining original case"
+    doAssert 0.1E12'wrap == "[[0.1E12]]"
+    doAssert 0.0e12'wrap == "[[0.0e12]]"
+    doAssert 0.0e+12'wrap == "[[0.0e+12]]"
+    doAssert 0.0e-12'wrap == "[[0.0e-12]]"
+    doAssert 0e-12'wrap == "[[0e-12]]"
+
+  block: # macro and template usage
+    template `'foo`(a: string): untyped = (a, 2)
+    doAssert -12'foo == ("-12", 2)
+    template `'fooplus`(a: string, b: int): untyped = (a, b)
+    doAssert -12'fooplus(2) == ("-12", 2)
+    template `'fooplusopt`(a: string, b: int = 99): untyped = (a, b)
+    doAssert -12'fooplusopt(2) == ("-12", 2)
+    doAssert -12'fooplusopt() == ("-12", 99)
+    doAssert -12'fooplusopt == ("-12", 99)
+    macro `'bar`(a: static string): untyped = newLit(a.repr)
+    doAssert -12'bar == "\"-12\""
+    macro deb(a): untyped = newLit(a.repr)
+    doAssert deb(-12'bar) == "-12'bar"
+
+  block: # bug 1 from https://github.com/nim-lang/Nim/pull/17020#issuecomment-803193947
+    macro deb1(a): untyped = newLit a.repr
+    macro deb2(a): untyped =
+      a[1] = ident($a[1])
+      newLit a.lispRepr
+    doAssert deb1(-12'wrap) == "-12'wrap"
+    doAssert deb1(-12'nonexistent) == "-12'nonexistent"
+    doAssert deb2(-12'nonexistent) == """(DotExpr (RStrLit "-12") (Ident "\'nonexistent"))"""
+    doAssert deb2(-12.wrap2) == """(DotExpr (IntLit -12) (Ident "wrap2"))"""
+    doAssert deb2(-12'wrap) == """(DotExpr (RStrLit "-12") (Ident "\'wrap"))"""
+
+  block: # bug 2 from https://github.com/nim-lang/Nim/pull/17020#issuecomment-803193947
+    template toSuf(`'suf`): untyped =
+      let x = -12'suf
+      x
+    doAssert toSuf(`'wrap`) == "[[-12]]"
+
+  block: # bug 10 from https://github.com/nim-lang/Nim/pull/17020#issuecomment-803193947
+    proc `myecho`(a: auto): auto = a
+    template fn1(): untyped =
+      let a = "abc"
+      -12'wrap
+    template fn2(): untyped =
+      `myecho` -12'wrap
+    template fn3(): untyped =
+      -12'wrap
+    doAssert fn1() == "[[-12]]"
+    doAssert fn2() == "[[-12]]"
+    doAssert fn3() == "[[-12]]"
+
+    block: # bug 9 from https://github.com/nim-lang/Nim/pull/17020#issuecomment-803193947
+      macro metawrap(): untyped =
+        func wrap1(a: string): string = "{" & a & "}"
+        func `'wrap3`(a: string): string = "{" & a & "}"
+        result = quote do:
+          let a1 {.inject.} = wrap1"-128"
+          let a2 {.inject.} = -128'wrap3
+      metawrap()
+      doAssert a1 == "{-128}"
+      doAssert a2 == "{-128}"
+
+static: main()
+main()
diff --git a/tests/lexer/tident.nim b/tests/lexer/tident.nim
new file mode 100644
index 000000000..e5177436d
--- /dev/null
+++ b/tests/lexer/tident.nim
@@ -0,0 +1,34 @@
+discard """
+output: '''
+Length correct
+Correct
+Correct
+Correct
+Correct
+Correct
+Correct
+Correct
+Correct
+'''
+"""
+
+type
+  TIdObj* = object of RootObj
+    id*: int                  # unique id; use this for comparisons and not the pointers
+
+  PIdObj* = ref TIdObj
+  PIdent* = ref TIdent
+  TIdent*{.acyclic.} = object
+    s*: string
+
+proc myNewString(L: int): string {.inline.} =
+  result = newString(L)
+  if result.len == L: echo("Length correct")
+  else: echo("bug")
+  for i in 0..L-1:
+    if result[i] == '\0':
+      echo("Correct")
+    else:
+      echo("Wrong")
+
+var s = myNewString(8)
diff --git a/tests/lexer/tind1.nim b/tests/lexer/tind1.nim
new file mode 100644
index 000000000..2185c3074
--- /dev/null
+++ b/tests/lexer/tind1.nim
@@ -0,0 +1,25 @@
+discard """
+  errormsg: "invalid indentation"
+  line: 24
+"""
+
+import macros
+
+# finally optional indentation in 'if' expressions :-):
+var x = if 4 != 5:
+    "yes"
+  else:
+    "no"
+
+macro mymacro(n, b): untyped =
+  discard
+
+mymacro:
+  echo "test"
+else:
+  echo "else part"
+
+if 4 == 3:
+  echo "bug"
+  else:
+  echo "no bug"
diff --git a/tests/lexer/tindent1.nim b/tests/lexer/tindent1.nim
new file mode 100644
index 000000000..78a303783
--- /dev/null
+++ b/tests/lexer/tindent1.nim
@@ -0,0 +1,42 @@
+discard """
+  output: '''Success'''
+"""
+
+const romanNumbers1 =
+    [
+    ("M", 1000), ("D", 500), ("C", 100),
+    ("L", 50), ("X", 10), ("V", 5), ("I", 1) ]
+
+const romanNumbers2 =
+    [
+    ("M", 1000), ("D", 500), ("C", 100),
+    ("L", 50), ("X", 10), ("V", 5), ("I", 1)
+    ]
+
+const romanNumbers3 =
+  [
+    ("M", 1000), ("D", 500), ("C", 100),
+    ("L", 50), ("X", 10), ("V", 5), ("I", 1)
+  ]
+
+const romanNumbers4 = [
+    ("M", 1000), ("D", 500), ("C", 100),
+    ("L", 50), ("X", 10), ("V", 5), ("I", 1)
+    ]
+
+
+proc main =
+  var j = 0
+  while j < 10:
+    inc(j);
+
+  if j == 5: doAssert false
+
+var j = 0
+while j < 10:
+  inc(j);
+
+if j == 5: doAssert false
+
+main()
+echo "Success"
diff --git a/tests/lexer/tintegerliterals.nim b/tests/lexer/tintegerliterals.nim
new file mode 100644
index 000000000..fd401b71b
--- /dev/null
+++ b/tests/lexer/tintegerliterals.nim
@@ -0,0 +1,9 @@
+# test the valid literals
+doAssert 0b10 == 2
+doAssert 0B10 == 2
+doAssert 0x10 == 16
+doAssert 0X10 == 16
+doAssert 0o10 == 8
+# the following is deprecated:
+doAssert 0c10 == 8
+doAssert 0C10 == 8
diff --git a/tests/lexer/tinvalidintegerliteral1.nim b/tests/lexer/tinvalidintegerliteral1.nim
new file mode 100644
index 000000000..6bf7624f3
--- /dev/null
+++ b/tests/lexer/tinvalidintegerliteral1.nim
@@ -0,0 +1,7 @@
+discard """
+  errormsg: "invalid number"
+  file: "tinvalidintegerliteral1.nim"
+  line: 7
+"""
+
+echo 0b
diff --git a/tests/lexer/tinvalidintegerliteral2.nim b/tests/lexer/tinvalidintegerliteral2.nim
new file mode 100644
index 000000000..eb6efc131
--- /dev/null
+++ b/tests/lexer/tinvalidintegerliteral2.nim
@@ -0,0 +1,7 @@
+discard """
+  errormsg: "invalid number"
+  file: "tinvalidintegerliteral2.nim"
+  line: 7
+"""
+
+echo 0x
diff --git a/tests/lexer/tinvalidintegerliteral3.nim b/tests/lexer/tinvalidintegerliteral3.nim
new file mode 100644
index 000000000..e09cda54a
--- /dev/null
+++ b/tests/lexer/tinvalidintegerliteral3.nim
@@ -0,0 +1,7 @@
+discard """
+  errormsg: "0O5 is an invalid int literal; For octal literals use the '0o' prefix."
+  file: "tinvalidintegerliteral3.nim"
+  line: 7
+"""
+
+echo 0O5
diff --git a/tests/lexer/tlexer.nim b/tests/lexer/tlexer.nim
new file mode 100644
index 000000000..e36220e7a
--- /dev/null
+++ b/tests/lexer/tlexer.nim
@@ -0,0 +1,60 @@
+discard """
+  disabled: true
+"""
+
+# We start with a comment
+# This is the same comment
+
+# This is a new one!
+
+import
+  lexbase, os, strutils
+
+type
+  TMyRec {.final.} = object
+    x, y: int     # coordinates
+    c: char       # a character
+    a: int32      # an integer
+
+  PMyRec = ref TMyRec # a reference to `TMyRec`
+
+proc splitText(txt: string): seq[string] # splits a text into several lines
+                                         # the comment continues here
+                                         # this is not easy to parse!
+
+proc anotherSplit(txt: string): seq[string] =
+  # the comment should belong to `anotherSplit`!
+  # another problem: comments are statements!
+
+const
+  x = 0B0_10001110100_0000101001000111101011101111111011000101001101001001'f64 # x ~~ 1.72826e35
+  myNan = 0B01111111100000101100000000001000'f32 # NAN
+  y = """
+    a rather long text.
+    Over many
+    lines.
+  """
+  s = "\xff"
+  a = {0..234}
+  b = {0..high(int)}
+  v = 0'i32
+  z = 6767566'f32
+
+# small test program for lexbase
+
+proc main*(infile: string, a, b: int, someverylongnamewithtype = 0,
+           anotherlongthingie = 3) =
+  var
+    myInt: int = 0
+    s: seq[string]
+  # this should be an error!
+  if initBaseLexer(L, infile, 30): nil
+  else:
+    writeLine(stdout, "could not open: " & infile)
+  writeLine(stdout, "Success!")
+  call(3, # we use 3
+       12, # we use 12
+       43) # we use 43
+
+
+main(ParamStr(1), 9, 0)
diff --git a/tests/lexer/tlexermisc.nim b/tests/lexer/tlexermisc.nim
new file mode 100644
index 000000000..3e3993599
--- /dev/null
+++ b/tests/lexer/tlexermisc.nim
@@ -0,0 +1,27 @@
+discard """
+  action: run
+  output: "equal"
+"""
+
+var t=0x950412DE
+
+if t==0x950412DE:
+    echo "equal"
+else:
+    echo "not equal"
+
+type
+  TArray = array[0x0012..0x0013, int]
+
+var a: TArray
+
+doAssert a[0x0012] == 0
+
+
+# #7884
+
+type Obj = object
+    ö: int
+
+let o = Obj(ö: 1)
+doAssert o.ö == 1
diff --git a/tests/lexer/tlexerspaces.nim b/tests/lexer/tlexerspaces.nim
new file mode 100644
index 000000000..14b16111d
--- /dev/null
+++ b/tests/lexer/tlexerspaces.nim
@@ -0,0 +1,2 @@
+discard 12 +                                                                                                                                                                                                                                                                           5
+discard 12 + 5                                                                                                                        
diff --git a/tests/lexer/tmissingnl.nim b/tests/lexer/tmissingnl.nim
new file mode 100644
index 000000000..dc939bcd2
--- /dev/null
+++ b/tests/lexer/tmissingnl.nim
@@ -0,0 +1,9 @@
+discard """
+  errormsg: "invalid indentation"
+  file: "tmissingnl.nim"
+  line: 7
+"""
+
+import strutils let s: seq[int] = @[0, 1, 2, 3, 4, 5, 6]
+
+#s[1..3] = @[]
diff --git a/tests/lexer/trawstr.nim b/tests/lexer/trawstr.nim
new file mode 100644
index 000000000..aa41071d5
--- /dev/null
+++ b/tests/lexer/trawstr.nim
@@ -0,0 +1,10 @@
+discard """
+  errormsg: "closing \" expected"
+  file: "trawstr.nim"
+  line: 10
+"""
+# Test the new raw strings:
+
+const
+  xxx = r"This is a raw string!"
+  yyy = "This not\" #ERROR
diff --git a/tests/lexer/tstrlits.nim b/tests/lexer/tstrlits.nim
new file mode 100644
index 000000000..8e8250a5b
--- /dev/null
+++ b/tests/lexer/tstrlits.nim
@@ -0,0 +1,19 @@
+discard """
+  output: "a\"\"long string\"\"\"\"\"abc\"def_'2'●𝌆𝌆A"
+"""
+# Test the new different string literals
+
+const
+  tripleEmpty = """"long string"""""""" # "long string """""
+
+  rawQuote = r"a"""
+
+  raw = r"abc""def"
+
+  escaped = "\x5f'\50'\u25cf\u{1D306}\u{1d306}\u{41}"
+
+
+stdout.write(rawQuote)
+stdout.write(tripleEmpty)
+stdout.write(raw)
+stdout.writeLine(escaped)
diff --git a/tests/lexer/tunary_minus.nim b/tests/lexer/tunary_minus.nim
new file mode 100644
index 000000000..5ec2b5c70
--- /dev/null
+++ b/tests/lexer/tunary_minus.nim
@@ -0,0 +1,83 @@
+discard """
+  targets: "c cpp js"
+"""
+
+# Test numeric literals and handling of minus symbol
+
+import std/[macros, strutils]
+import std/private/jsutils
+
+import mlexerutils
+
+const one = 1
+const minusOne = `-`(one)
+
+# border cases that *should* generate compiler errors:
+assertAST dedent """
+  StmtList
+    Asgn
+      Ident "x"
+      Command
+        IntLit 4
+        IntLit -1""":
+  x = 4 -1
+assertAST dedent """
+  StmtList
+    VarSection
+      IdentDefs
+        Ident "x"
+        Ident "uint"
+        IntLit -1""":
+  var x: uint = -1
+template bad() =
+  x = 4 -1
+doAssert not compiles(bad())
+
+template main =
+  block: # check when a minus (-) is a negative sign for a literal
+    doAssert -1 == minusOne:
+      "unable to parse a spaced-prefixed negative int"
+    doAssert lispReprStr(-1) == """(IntLit -1)"""
+    doAssert -1.0'f64 == minusOne.float64
+    doAssert lispReprStr(-1.000'f64) == """(Float64Lit -1.0)"""
+    doAssert lispReprStr( -1.000'f64) == """(Float64Lit -1.0)"""
+    doAssert [-1].contains(minusOne):
+      "unable to handle negatives after square bracket"
+    doAssert lispReprStr([-1]) == """(Bracket (IntLit -1))"""
+    doAssert (-1, 2)[0] == minusOne:
+      "unable to handle negatives after parenthesis"
+    doAssert lispReprStr((-1, 2)) == """(TupleConstr (IntLit -1) (IntLit 2))"""
+    proc x(): int =
+      var a = 1;-1  # the -1 should act as the return value
+    doAssert x() == minusOne:
+      "unable to handle negatives after semi-colon"
+
+  block:
+    doAssert -0b111 == -7
+    doAssert -0xff == -255
+    doAssert -128'i8 == (-128).int8
+    doAssert $(-128'i8) == "-128"
+    doAssert -32768'i16 == int16.low
+    doAssert -2147483648'i32 == int32.low
+    when int.sizeof > 4:
+      doAssert -9223372036854775808 == int.low
+    whenJsNoBigInt64: discard
+    do:
+      doAssert -9223372036854775808 == int64.low
+
+  block: # check when a minus (-) is an unary op
+    doAssert -one == minusOne:
+      "unable to a negative prior to identifier"
+
+  block: # check when a minus (-) is a a subtraction op
+    doAssert 4-1 == 3:
+      "unable to handle subtraction sans surrounding spaces with a numeric literal"
+    doAssert 4-one == 3:
+      "unable to handle subtraction sans surrounding spaces with an identifier"
+    doAssert 4 - 1 == 3:
+      "unable to handle subtraction with surrounding spaces with a numeric literal"
+    doAssert 4 - one == 3:
+      "unable to handle subtraction with surrounding spaces with an identifier"
+
+static: main()
+main()
diff --git a/tests/lexer/tunderscores.nim b/tests/lexer/tunderscores.nim
new file mode 100644
index 000000000..1896a2898
--- /dev/null
+++ b/tests/lexer/tunderscores.nim
@@ -0,0 +1,11 @@
+discard """
+  errormsg: "invalid token: trailing underscore"
+  file: "tunderscores.nim"
+  line: 8
+"""
+# Bug #502670
+
+var ef_ = 3  #ERROR_MSG invalid token: _
+var a__b = 1
+var c___d = 2
+echo(ab, cd, ef_)
diff --git a/tests/lexer/tunicode_operators.nim b/tests/lexer/tunicode_operators.nim
new file mode 100644
index 000000000..6ad40beab
--- /dev/null
+++ b/tests/lexer/tunicode_operators.nim
@@ -0,0 +1,16 @@
+#{.experimental: "unicodeOperators".}
+
+proc `⊙`(x, y: int): int = x * y
+proc `⊙=`(x: var int, y: int) = x *= y
+
+proc `⊞++`(x, y: int): int = x + y
+
+const a = 9
+
+var x = 45
+x ⊙= a⊞++4⊙3
+
+var y = 45
+y *= 9 + 4 * 3
+
+assert x == y