custom integer literals (#17489)

* user defined integer literals; refs #17020 * updated renderer.nim * use mlexerutils helper * imported all test cases from https://github.com/nim-lang/Nim/pull/17020 * final grammar updated
author: Andreas Rumpf <rumpf_a@web.de> 2021-03-24 14:46:19 +0100
committer: GitHub <noreply@github.com> 2021-03-24 14:46:19 +0100
commit: 5f5a92379fab59db0e7e3da5a9cec2a11be45ed8 (patch)
tree: 1bce47645068dd814f992c221d14e56bb0b2c932
parent: 7366a3da37605b230823dd4b6db07abb70dbd40b (diff)
download: Nim-5f5a92379fab59db0e7e3da5a9cec2a11be45ed8.tar.gz
12 files changed, 412 insertions, 221 deletions
diff --git a/changelog.md b/changelog.md
index c0cd97024..922b8470a 100644
--- a/changelog.md
+++ b/changelog.md
@@ -266,6 +266,8 @@
 - The unary minus in `-1` is now part of the integer literal, it is now parsed as a single token.
   This implies that edge cases like `-128'i8` finally work correctly.
 
+- Custom numeric literals are now supported.
+
 
 ## Compiler changes
 
diff --git a/compiler/docgen.nim b/compiler/docgen.nim
index dded231d7..b24a24a2b 100644
--- a/compiler/docgen.nim
+++ b/compiler/docgen.nim
@@ -416,7 +416,7 @@ proc nodeToHighlightedHtml(d: PDoc; n: PNode; result: var Rope; renderFlags: TRe
     of tkOpr:
       dispA(d.conf, result, "<span class=\"Operator\">$1</span>", "\\spanOperator{$1}",
             [escLit])
-    of tkStrLit..tkTripleStrLit:
+    of tkStrLit..tkTripleStrLit, tkCustomLit:
       dispA(d.conf, result, "<span class=\"StringLit\">$1</span>",
             "\\spanStringLit{$1}", [escLit])
     of tkCharLit:
diff --git a/compiler/lexer.nim b/compiler/lexer.nim
index bcd3f0076..b34b010c2 100644
--- a/compiler/lexer.nim
+++ b/compiler/lexer.nim
@@ -60,6 +60,7 @@ type
     tkFloat64Lit = "tkFloat64Lit", tkFloat128Lit = "tkFloat128Lit",
     tkStrLit = "tkStrLit", tkRStrLit = "tkRStrLit", tkTripleStrLit = "tkTripleStrLit",
     tkGStrLit = "tkGStrLit", tkGTripleStrLit = "tkGTripleStrLit", tkCharLit = "tkCharLit",
+    tkCustomLit = "tkCustomLit",
 
     tkParLe = "(", tkParRi = ")", tkBracketLe = "[",
     tkBracketRi = "]", tkCurlyLe = "{", tkCurlyRi = "}",
@@ -313,8 +314,7 @@ proc getNumber(L: var Lexer, result: var Token) =
 
   proc lexMessageLitNum(L: var Lexer, msg: string, startpos: int, msgKind = errGenerated) =
     # Used to get slightly human friendlier err messages.
-    const literalishChars = {'A'..'F', 'a'..'f', '0'..'9', 'X', 'x', 'o', 'O',
-      'c', 'C', 'b', 'B', '_', '.', '\'', 'd', 'i', 'u'}
+    const literalishChars = {'A'..'Z', 'a'..'z', '0'..'9', '_', '.', '\''}
     var msgPos = L.bufpos
     var t: Token
     t.literal = ""
@@ -326,15 +326,14 @@ proc getNumber(L: var Lexer, result: var Token) =
       t.literal.add(L.buf[L.bufpos])
       inc(L.bufpos)
       matchChars(L, t, literalishChars)
-    if L.buf[L.bufpos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
-      inc(L.bufpos)
+    if L.buf[L.bufpos] in literalishChars:
       t.literal.add(L.buf[L.bufpos])
+      inc(L.bufpos)
       matchChars(L, t, {'0'..'9'})
     L.bufpos = msgPos
     lexMessage(L, msgKind, msg % t.literal)
 
   var
-    startpos, endpos: int
     xi: BiggestInt
     isBase10 = true
     numDigits = 0
@@ -346,7 +345,7 @@ proc getNumber(L: var Lexer, result: var Token) =
   result.tokType = tkIntLit   # int literal until we know better
   result.literal = ""
   result.base = base10
-  startpos = L.bufpos
+  let startpos = L.bufpos
   tokenBegin(result, startpos)
 
   var isPositive = true
@@ -395,201 +394,187 @@ proc getNumber(L: var Lexer, result: var Token) =
       discard matchUnderscoreChars(L, result, {'0'..'9'})
     if L.buf[L.bufpos] in {'e', 'E'}:
       result.tokType = tkFloatLit
-      eatChar(L, result, 'e')
+      eatChar(L, result)
       if L.buf[L.bufpos] in {'+', '-'}:
         eatChar(L, result)
       discard matchUnderscoreChars(L, result, {'0'..'9'})
-  endpos = L.bufpos
+  let endpos = L.bufpos
 
   # Second stage, find out if there's a datatype suffix and handle it
   var postPos = endpos
+
   if L.buf[postPos] in {'\'', 'f', 'F', 'd', 'D', 'i', 'I', 'u', 'U'}:
+    let errPos = postPos
+    var customLitPossible = false
     if L.buf[postPos] == '\'':
       inc(postPos)
+      customLitPossible = true
 
-    case L.buf[postPos]
-    of 'f', 'F':
-      inc(postPos)
-      if (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
-        result.tokType = tkFloat32Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
-        result.tokType = tkFloat64Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '1') and
-           (L.buf[postPos + 1] == '2') and
-           (L.buf[postPos + 2] == '8'):
-        result.tokType = tkFloat128Lit
-        inc(postPos, 3)
-      else:   # "f" alone defaults to float32
-        result.tokType = tkFloat32Lit
-    of 'd', 'D':  # ad hoc convenience shortcut for f64
-      inc(postPos)
-      result.tokType = tkFloat64Lit
-    of 'i', 'I':
-      inc(postPos)
-      if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
-        result.tokType = tkInt64Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
-        result.tokType = tkInt32Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
-        result.tokType = tkInt16Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '8'):
-        result.tokType = tkInt8Lit
-        inc(postPos)
-      else:
-        lexMessageLitNum(L, "invalid number: '$1'", startpos)
-    of 'u', 'U':
-      inc(postPos)
-      if (L.buf[postPos] == '6') and (L.buf[postPos + 1] == '4'):
-        result.tokType = tkUInt64Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '3') and (L.buf[postPos + 1] == '2'):
-        result.tokType = tkUInt32Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '1') and (L.buf[postPos + 1] == '6'):
-        result.tokType = tkUInt16Lit
-        inc(postPos, 2)
-      elif (L.buf[postPos] == '8'):
-        result.tokType = tkUInt8Lit
-        inc(postPos)
+    if L.buf[postPos] in SymChars:
+      var suffixAsLower = newStringOfCap(10)
+      var suffix = newStringOfCap(10)
+      while true:
+        let c = L.buf[postPos]
+        suffix.add c
+        suffixAsLower.add toLowerAscii(c)
+        inc postPos
+        if L.buf[postPos] notin SymChars+{'_'}: break
+      case suffix
+      of "f", "f32": result.tokType = tkFloat32Lit
+      of "d", "f64": result.tokType = tkFloat64Lit
+      of "f128": result.tokType = tkFloat128Lit
+      of "i8": result.tokType = tkInt8Lit
+      of "i16": result.tokType = tkInt16Lit
+      of "i32": result.tokType = tkInt32Lit
+      of "i64": result.tokType = tkInt64Lit
+      of "u": result.tokType = tkUIntLit
+      of "u8": result.tokType = tkUInt8Lit
+      of "u16": result.tokType = tkUInt16Lit
+      of "u32": result.tokType = tkUInt32Lit
+      of "u64": result.tokType = tkUInt64Lit
       else:
-        result.tokType = tkUIntLit
+        if customLitPossible:
+          # remember the position of the ``'`` so that the parser doesn't
+          # have to reparse the custom literal:
+          result.iNumber = len(result.literal)
+          result.literal.add '\''
+          result.literal.add suffix
+          result.tokType = tkCustomLit
+        else:
+          lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
     else:
-      lexMessageLitNum(L, "invalid number: '$1'", startpos)
+      lexMessageLitNum(L, "invalid number suffix: '$1'", errPos)
 
   # Is there still a literalish char awaiting? Then it's an error!
   if  L.buf[postPos] in literalishChars or
      (L.buf[postPos] == '.' and L.buf[postPos + 1] in {'0'..'9'}):
     lexMessageLitNum(L, "invalid number: '$1'", startpos)
 
-  # Third stage, extract actual number
-  L.bufpos = startpos            # restore position
-  var pos = startpos
-  try:
-    if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
-      inc(pos, 2)
-      xi = 0                  # it is a base prefix
-
-      case L.buf[pos - 1]
-      of 'b', 'B':
-        result.base = base2
-        while pos < endpos:
-          if L.buf[pos] != '_':
-            xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
-          inc(pos)
-      # 'c', 'C' is deprecated
-      of 'o', 'c', 'C':
-        result.base = base8
-        while pos < endpos:
-          if L.buf[pos] != '_':
-            xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
-          inc(pos)
-      of 'x', 'X':
-        result.base = base16
-        while pos < endpos:
-          case L.buf[pos]
-          of '_':
-            inc(pos)
-          of '0'..'9':
-            xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
-            inc(pos)
-          of 'a'..'f':
-            xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
+  if result.tokType != tkCustomLit:
+    # Third stage, extract actual number
+    L.bufpos = startpos            # restore position
+    var pos = startpos
+    try:
+      if (L.buf[pos] == '0') and (L.buf[pos + 1] in baseCodeChars):
+        inc(pos, 2)
+        xi = 0                  # it is a base prefix
+
+        case L.buf[pos - 1]
+        of 'b', 'B':
+          result.base = base2
+          while pos < endpos:
+            if L.buf[pos] != '_':
+              xi = `shl`(xi, 1) or (ord(L.buf[pos]) - ord('0'))
             inc(pos)
-          of 'A'..'F':
-            xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
+        # 'c', 'C' is deprecated
+        of 'o', 'c', 'C':
+          result.base = base8
+          while pos < endpos:
+            if L.buf[pos] != '_':
+              xi = `shl`(xi, 3) or (ord(L.buf[pos]) - ord('0'))
             inc(pos)
-          else:
-            break
+        of 'x', 'X':
+          result.base = base16
+          while pos < endpos:
+            case L.buf[pos]
+            of '_':
+              inc(pos)
+            of '0'..'9':
+              xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('0'))
+              inc(pos)
+            of 'a'..'f':
+              xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('a') + 10)
+              inc(pos)
+            of 'A'..'F':
+              xi = `shl`(xi, 4) or (ord(L.buf[pos]) - ord('A') + 10)
+              inc(pos)
+            else:
+              break
+        else:
+          internalError(L.config, getLineInfo(L), "getNumber")
+
+        case result.tokType
+        of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
+        of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
+        of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
+        of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
+        of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
+        of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
+        of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
+        of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
+        of tkFloat32Lit:
+          setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
+          # note: this code is endian neutral!
+          # XXX: Test this on big endian machine!
+        of tkFloat64Lit, tkFloatLit:
+          setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
+        else: internalError(L.config, getLineInfo(L), "getNumber")
+
+        # Bounds checks. Non decimal literals are allowed to overflow the range of
+        # the datatype as long as their pattern don't overflow _bitwise_, hence
+        # below checks of signed sizes against uint*.high is deliberate:
+        # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
+        if result.tokType notin floatTypes:
+          let outOfRange =
+            case result.tokType
+            of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
+            of tkInt8Lit:  (xi > BiggestInt(uint8.high))
+            of tkInt16Lit: (xi > BiggestInt(uint16.high))
+            of tkInt32Lit: (xi > BiggestInt(uint32.high))
+            else: false
+
+          if outOfRange:
+            #echo "out of range num: ", result.iNumber, " vs ", xi
+            lexMessageLitNum(L, "number out of range: '$1'", startpos)
+
       else:
-        internalError(L.config, getLineInfo(L), "getNumber")
-
-      case result.tokType
-      of tkIntLit, tkInt64Lit: setNumber result.iNumber, xi
-      of tkInt8Lit: setNumber result.iNumber, ashr(xi shl 56, 56)
-      of tkInt16Lit: setNumber result.iNumber, ashr(xi shl 48, 48)
-      of tkInt32Lit: setNumber result.iNumber, ashr(xi shl 32, 32)
-      of tkUIntLit, tkUInt64Lit: setNumber result.iNumber, xi
-      of tkUInt8Lit: setNumber result.iNumber, xi and 0xff
-      of tkUInt16Lit: setNumber result.iNumber, xi and 0xffff
-      of tkUInt32Lit: setNumber result.iNumber, xi and 0xffffffff
-      of tkFloat32Lit:
-        setNumber result.fNumber, (cast[PFloat32](addr(xi)))[]
-        # note: this code is endian neutral!
-        # XXX: Test this on big endian machine!
-      of tkFloat64Lit, tkFloatLit:
-        setNumber result.fNumber, (cast[PFloat64](addr(xi)))[]
-      else: internalError(L.config, getLineInfo(L), "getNumber")
-
-      # Bounds checks. Non decimal literals are allowed to overflow the range of
-      # the datatype as long as their pattern don't overflow _bitwise_, hence
-      # below checks of signed sizes against uint*.high is deliberate:
-      # (0x80'u8 = 128, 0x80'i8 = -128, etc == OK)
-      if result.tokType notin floatTypes:
+        case result.tokType
+        of floatTypes:
+          result.fNumber = parseFloat(result.literal)
+        of tkUInt64Lit, tkUIntLit:
+          var iNumber: uint64
+          var len: int
+          try:
+            len = parseBiggestUInt(result.literal, iNumber)
+          except ValueError:
+            raise newException(OverflowDefect, "number out of range: " & $result.literal)
+          if len != result.literal.len:
+            raise newException(ValueError, "invalid integer: " & $result.literal)
+          result.iNumber = cast[int64](iNumber)
+        else:
+          var iNumber: int64
+          var len: int
+          try:
+            len = parseBiggestInt(result.literal, iNumber)
+          except ValueError:
+            raise newException(OverflowDefect, "number out of range: " & $result.literal)
+          if len != result.literal.len:
+            raise newException(ValueError, "invalid integer: " & $result.literal)
+          result.iNumber = iNumber
+
+        # Explicit bounds checks.
         let outOfRange =
           case result.tokType
-          of tkUInt8Lit, tkUInt16Lit, tkUInt32Lit: result.iNumber != xi
-          of tkInt8Lit:  (xi > BiggestInt(uint8.high))
-          of tkInt16Lit: (xi > BiggestInt(uint16.high))
-          of tkInt32Lit: (xi > BiggestInt(uint32.high))
+          of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
+          of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
+          of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
+          of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
+          of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
+          of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
           else: false
 
         if outOfRange:
-          #echo "out of range num: ", result.iNumber, " vs ", xi
           lexMessageLitNum(L, "number out of range: '$1'", startpos)
 
-    else:
-      case result.tokType
-      of floatTypes:
-        result.fNumber = parseFloat(result.literal)
-      of tkUInt64Lit, tkUIntLit:
-        var iNumber: uint64
-        var len: int
-        try:
-          len = parseBiggestUInt(result.literal, iNumber)
-        except ValueError:
-          raise newException(OverflowDefect, "number out of range: " & $result.literal)
-        if len != result.literal.len:
-          raise newException(ValueError, "invalid integer: " & $result.literal)
-        result.iNumber = cast[int64](iNumber)
-      else:
-        var iNumber: int64
-        var len: int
-        try:
-          len = parseBiggestInt(result.literal, iNumber)
-        except ValueError:
-          raise newException(OverflowDefect, "number out of range: " & $result.literal)
-        if len != result.literal.len:
-          raise newException(ValueError, "invalid integer: " & $result.literal)
-        result.iNumber = iNumber
-
-      # Explicit bounds checks.
-      let outOfRange =
-        case result.tokType
-        of tkInt8Lit: result.iNumber > int8.high or result.iNumber < int8.low
-        of tkUInt8Lit: result.iNumber > BiggestInt(uint8.high) or result.iNumber < 0
-        of tkInt16Lit: result.iNumber > int16.high or result.iNumber < int16.low
-        of tkUInt16Lit: result.iNumber > BiggestInt(uint16.high) or result.iNumber < 0
-        of tkInt32Lit: result.iNumber > int32.high or result.iNumber < int32.low
-        of tkUInt32Lit: result.iNumber > BiggestInt(uint32.high) or result.iNumber < 0
-        else: false
-
-      if outOfRange:
-        lexMessageLitNum(L, "number out of range: '$1'", startpos)
-
-    # Promote int literal to int64? Not always necessary, but more consistent
-    if result.tokType == tkIntLit:
-      if result.iNumber > high(int32) or result.iNumber < low(int32):
-        result.tokType = tkInt64Lit
-
-  except ValueError:
-    lexMessageLitNum(L, "invalid number: '$1'", startpos)
-  except OverflowDefect, RangeDefect:
-    lexMessageLitNum(L, "number out of range: '$1'", startpos)
+      # Promote int literal to int64? Not always necessary, but more consistent
+      if result.tokType == tkIntLit:
+        if result.iNumber > high(int32) or result.iNumber < low(int32):
+          result.tokType = tkInt64Lit
+
+    except ValueError:
+      lexMessageLitNum(L, "invalid number: '$1'", startpos)
+    except OverflowDefect, RangeDefect:
+      lexMessageLitNum(L, "number out of range: '$1'", startpos)
   tokenEnd(result, postPos-1)
   L.bufpos = postPos
 
@@ -830,8 +815,9 @@ proc getString(L: var Lexer, tok: var Token, mode: StringMode) =
         inc(pos)
     L.bufpos = pos
 
-proc getCharacter(L: var Lexer, tok: var Token) =
+proc getCharacter(L: var Lexer; tok: var Token) =
   tokenBegin(tok, L.bufpos)
+  let startPos = L.bufpos
   inc(L.bufpos)               # skip '
   var c = L.buf[L.bufpos]
   case c
@@ -842,10 +828,16 @@ proc getCharacter(L: var Lexer, tok: var Token) =
   else:
     tok.literal = $c
     inc(L.bufpos)
-  if L.buf[L.bufpos] != '\'':
-    lexMessage(L, errGenerated, "missing closing ' for character literal")
-  tokenEndIgnore(tok, L.bufpos)
-  inc(L.bufpos)               # skip '
+  if L.buf[L.bufpos] == '\'':
+    tokenEndIgnore(tok, L.bufpos)
+    inc(L.bufpos)               # skip '
+  else:
+    if startPos > 0 and L.buf[startPos-1] == '`':
+      tok.literal = "'"
+      L.bufpos = startPos+1
+    else:
+      lexMessage(L, errGenerated, "missing closing ' for character literal")
+    tokenEndIgnore(tok, L.bufpos)
 
 proc getSymbol(L: var Lexer, tok: var Token) =
   var h: Hash = 0
diff --git a/compiler/parser.nim b/compiler/parser.nim
index fe857c81b..b9a6ffb8c 100644
--- a/compiler/parser.nim
+++ b/compiler/parser.nim
@@ -355,7 +355,7 @@ proc parseSymbol(p: var Parser, mode = smNormal): PNode =
         let node = newNodeI(nkIdent, lineinfo)
         node.ident = p.lex.cache.getIdent(accm)
         result.add(node)
-      of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCharLit:
+      of tokKeywordLow..tokKeywordHigh, tkSymbol, tkIntLit..tkCustomLit:
         result.add(newIdentNodeP(p.lex.cache.getIdent($p.tok), p))
         getTok(p)
       else:
@@ -627,7 +627,7 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
   #|           | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
   #|           | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
   #|           | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
-  #|           | CHAR_LIT
+  #|           | CHAR_LIT | CUSTOM_NUMERIC_LIT
   #|           | NIL
   #| generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
   #| identOrLiteral = generalizedLit | symbol | literal
@@ -710,6 +710,14 @@ proc identOrLiteral(p: var Parser, mode: PrimaryMode): PNode =
   of tkCharLit:
     result = newIntNodeP(nkCharLit, ord(p.tok.literal[0]), p)
     getTok(p)
+  of tkCustomLit:
+    let splitPos = p.tok.iNumber.int
+    let str = newStrNodeP(nkRStrLit, p.tok.literal.substr(0, splitPos-1), p)
+    let callee = newIdentNodeP(getIdent(p.lex.cache, p.tok.literal.substr(splitPos)), p)
+    result = newNodeP(nkDotExpr, p)
+    result.add str
+    result.add callee
+    getTok(p)
   of tkNil:
     result = newNodeP(nkNilLit, p)
     getTok(p)
@@ -807,7 +815,7 @@ proc primarySuffix(p: var Parser, r: PNode,
         result = commandExpr(p, result, mode)
         break
       result = namedParams(p, result, nkCurlyExpr, tkCurlyRi)
-    of tkSymbol, tkAccent, tkIntLit..tkCharLit, tkNil, tkCast,
+    of tkSymbol, tkAccent, tkIntLit..tkCustomLit, tkNil, tkCast,
        tkOpr, tkDotDot, tkVar, tkOut, tkStatic, tkType, tkEnum, tkTuple,
        tkObject, tkProc:
       # XXX: In type sections we allow the free application of the
@@ -1097,7 +1105,7 @@ proc isExprStart(p: Parser): bool =
   case p.tok.tokType
   of tkSymbol, tkAccent, tkOpr, tkNot, tkNil, tkCast, tkIf, tkFor,
      tkProc, tkFunc, tkIterator, tkBind, tkBuiltInMagics,
-     tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCharLit, tkVar, tkRef, tkPtr,
+     tkParLe, tkBracketLe, tkCurlyLe, tkIntLit..tkCustomLit, tkVar, tkRef, tkPtr,
      tkTuple, tkObject, tkWhen, tkCase, tkOut:
     result = true
   else: result = false
@@ -1498,7 +1506,7 @@ proc parseReturnOrRaise(p: var Parser, kind: TNodeKind): PNode =
   #| yieldStmt = 'yield' optInd expr?
   #| discardStmt = 'discard' optInd expr?
   #| breakStmt = 'break' optInd expr?
-  #| continueStmt = 'break' optInd expr?
+  #| continueStmt = 'continue' optInd expr?
   result = newNodeP(kind, p)
   getTok(p)
   if p.tok.tokType == tkComment:
diff --git a/compiler/renderer.nim b/compiler/renderer.nim
index c36eaaf11..9ca485f6e 100644
--- a/compiler/renderer.nim
+++ b/compiler/renderer.nim
@@ -942,7 +942,7 @@ proc skipHiddenNodes(n: PNode): PNode =
     else: break
 
 proc accentedName(g: var TSrcGen, n: PNode) =
-  const backticksNeeded = OpChars + {'[', '{'}
+  const backticksNeeded = OpChars + {'[', '{', '\''}
   if n == nil: return
   let isOperator =
     if n.kind == nkIdent and n.ident.s.len > 0 and n.ident.s[0] in backticksNeeded: true
@@ -976,6 +976,11 @@ proc infixArgument(g: var TSrcGen, n: PNode, i: int) =
   if needsParenthesis:
     put(g, tkParRi, ")")
 
+proc isCustomLit(n: PNode): bool =
+  n.len == 2 and n[0].kind == nkRStrLit and
+    (n[1].kind == nkIdent and n[1].ident.s.startsWith('\'')) or
+    (n[1].kind == nkSym and n[1].sym.name.s.startsWith('\''))
+
 proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
   if isNil(n): return
   var
@@ -1195,9 +1200,14 @@ proc gsub(g: var TSrcGen, n: PNode, c: TContext) =
     gcomma(g, n, c)
     put(g, tkBracketRi, "]")
   of nkDotExpr:
-    gsub(g, n, 0)
-    put(g, tkDot, ".")
-    gsub(g, n, 1)
+    if isCustomLit(n):
+      put(g, tkCustomLit, n[0].strVal)
+      gsub(g, n, 1)
+    else:
+      gsub(g, n, 0)
+      put(g, tkDot, ".")
+      if n.len > 1:
+        accentedName(g, n[1])
   of nkBind:
     putWithSpace(g, tkBind, "bind")
     gsub(g, n, 0)
diff --git a/compiler/semstmts.nim b/compiler/semstmts.nim
index ff8f68ed0..fee43162e 100644
--- a/compiler/semstmts.nim
+++ b/compiler/semstmts.nim
@@ -1524,7 +1524,7 @@ proc semProcAnnotation(c: PContext, prc: PNode;
     return
 
 proc semInferredLambda(c: PContext, pt: TIdTable, n: PNode): PNode {.nosinks.} =
-  ## used for resolving 'auto' in lambdas based on their callsite 
+  ## used for resolving 'auto' in lambdas based on their callsite
   var n = n
   let original = n[namePos].sym
   let s = original #copySym(original, false)
diff --git a/doc/grammar.txt b/doc/grammar.txt
index 0d5eef179..d4f4a0515 100644
--- a/doc/grammar.txt
+++ b/doc/grammar.txt
@@ -46,7 +46,7 @@ literal = | INT_LIT | INT8_LIT | INT16_LIT | INT32_LIT | INT64_LIT
           | UINT_LIT | UINT8_LIT | UINT16_LIT | UINT32_LIT | UINT64_LIT
           | FLOAT_LIT | FLOAT32_LIT | FLOAT64_LIT
           | STR_LIT | RSTR_LIT | TRIPLESTR_LIT
-          | CHAR_LIT
+          | CHAR_LIT | CUSTOM_NUMERIC_LIT
           | NIL
 generalizedLit = GENERALIZED_STR_LIT | GENERALIZED_TRIPLESTR_LIT
 identOrLiteral = generalizedLit | symbol | literal
@@ -100,6 +100,7 @@ postExprBlocks = ':' stmt? ( IND{=} doBlock
                            | IND{=} 'of' exprList ':' stmt
                            | IND{=} 'elif' expr ':' stmt
                            | IND{=} 'except' exprList ':' stmt
+                           | IND{=} 'finally' ':' stmt
                            | IND{=} 'else' ':' stmt )*
 exprStmt = simpleExpr
          (( '=' optInd expr colonBody? )
diff --git a/doc/manual.rst b/doc/manual.rst
index db1251630..a882eb945 100644
--- a/doc/manual.rst
+++ b/doc/manual.rst
@@ -490,11 +490,17 @@ this. Another reason is that Nim can thus support `array[char, int]` or
 type is used for Unicode characters, it can represent any Unicode character.
 `Rune` is declared in the `unicode module <unicode.html>`_.
 
+A character literal that  does not end in ``'`` interpreted as ``'`` if there
+is a preceeding backtick token. There must be no whitespace between the preceeding
+backtick token and the character literal. This special rule ensures that a declaration
+like ``proc `'customLiteral`(s: string)`` is valid. See also
+`Custom Numeric Literals <#custom-numeric-literals>`_.
 
-Numerical constants
--------------------
 
-Numerical constants are of a single type and have the form::
+Numeric Literals
+----------------
+
+Numeric literals have the form::
 
   hexdigit = digit | 'A'..'F' | 'a'..'f'
   octdigit = '0'..'7'
@@ -530,8 +536,10 @@ Numerical constants are of a single type and have the form::
   FLOAT64_LIT = HEX_LIT '\'' FLOAT64_SUFFIX
               | (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) ['\''] FLOAT64_SUFFIX
 
+  CUSTOM_NUMERIC_LIT = (FLOAT_LIT | DEC_LIT | OCT_LIT | BIN_LIT) '\'' CUSTOM_NUMERIC_SUFFIX
 
-As can be seen in the productions, numerical constants can contain underscores
+
+As can be seen in the productions, numeric literals can contain underscores
 for readability. Integer and floating-point literals may be given in decimal (no
 prefix), binary (prefix `0b`), octal (prefix `0o`), and hexadecimal
 (prefix `0x`) notation.
@@ -579,7 +587,7 @@ is optional if it is not ambiguous (only hexadecimal floating-point literals
 with a type suffix can be ambiguous).
 
 
-The type suffixes are:
+The pre-defined type suffixes are:
 
 =================    =========================
   Type Suffix        Resulting type of literal
@@ -611,6 +619,42 @@ the bit width of the datatype, it is accepted.
 Hence: 0b10000000'u8 == 0x80'u8 == 128, but, 0b10000000'i8 == 0x80'i8 == -1
 instead of causing an overflow error.
 
+
+Custom Numeric Literals
+~~~~~~~~~~~~~~~~~~~~~~~
+
+If the suffix is not predefined, then the suffix is assumed to be a call
+to a proc, template, macro or other callable identifier that is passed the
+string containing the literal. The callable identifier needs to be declared
+with a special ``'`` prefix:
+
+.. code-block:: nim
+
+  import strutils
+  type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
+  proc `'u4`(n: string): u4 =
+    # The leading ' is required.
+    result = (parseInt(n) and 0x0F).u4
+
+  var x = 5'u4
+
+More formally, a custom numeric literal `123'custom` is transformed
+to r"123".`'custom` in the parsing step. There is no AST node kind that
+corresponds to this transformation. The transformation naturally handles
+the case that additional parameters are passed to the callee:
+
+.. code-block:: nim
+
+  import strutils
+  type u4 = distinct uint8 # a 4-bit unsigned integer aka "nibble"
+  proc `'u4`(n: string; moreData: int): u4 =
+    result = (parseInt(n) and 0x0F).u4
+
+  var x = 5'u4(123)
+
+Custom numeric literals are covered by the grammar rule named `CUSTOM_NUMERIC_LIT`.
+
+
 Operators
 ---------
 
diff --git a/tests/lexer/mlexerutils.nim b/tests/lexer/mlexerutils.nim
new file mode 100644
index 000000000..eae7a0006
--- /dev/null
+++ b/tests/lexer/mlexerutils.nim
@@ -0,0 +1,9 @@
+import macros
+
+macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
+
+macro assertAST*(expected: string, struct: untyped): untyped =
+  var ast = newLit(struct.treeRepr)
+  result = quote do:
+    if `ast` != `expected`:
+      doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
\ No newline at end of file
diff --git a/tests/lexer/tcustom_numeric_literals.nim b/tests/lexer/tcustom_numeric_literals.nim
new file mode 100644
index 000000000..a2f355b4d
--- /dev/null
+++ b/tests/lexer/tcustom_numeric_literals.nim
@@ -0,0 +1,150 @@
+discard """
+  targets: "c cpp js"
+"""
+
+# Test tkStrNumLit
+
+import std/[macros, strutils]
+import mlexerutils
+
+# AST checks
+
+assertAST dedent """
+  StmtList
+    ProcDef
+      AccQuoted
+        Ident "\'"
+        Ident "wrap"
+      Empty
+      Empty
+      FormalParams
+        Ident "string"
+        IdentDefs
+          Ident "number"
+          Ident "string"
+          Empty
+      Empty
+      Empty
+      StmtList
+        Asgn
+          Ident "result"
+          Infix
+            Ident "&"
+            Infix
+              Ident "&"
+              StrLit "[["
+              Ident "number"
+            StrLit "]]"""":
+  proc `'wrap`(number: string): string =
+    result = "[[" & number & "]]"
+
+assertAST dedent """
+  StmtList
+    DotExpr
+      RStrLit "-38383839292839283928392839283928392839283.928493849385935898243e-50000"
+      Ident "\'wrap"""":
+  -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap
+
+proc `'wrap`(number: string): string = "[[" & number & "]]"
+doAssert lispReprStr(-1'wrap) == """(DotExpr (RStrLit "-1") (Ident "\'wrap"))"""
+
+template main =
+  block: # basic suffix usage
+    template `'twrap`(number: string): untyped =
+      number.`'wrap`
+    proc extraContext(): string =
+      22.40'wrap
+    proc `*`(left, right: string): string =
+      result = left & "times" & right
+    proc `+`(left, right: string): string =
+      result = left & "plus" & right
+
+    doAssert 1'wrap == "[[1]]"
+    doAssert -1'wrap == "[[-1]]":
+      "unable to resolve a negative integer-suffix pattern"
+    doAssert 12345.67890'wrap == "[[12345.67890]]"
+    doAssert 1'wrap*1'wrap == "[[1]]times[[1]]":
+      "unable to resolve an operator between two suffixed numeric literals"
+    doAssert 1'wrap+ -1'wrap == "[[1]]plus[[-1]]":  # will generate a compiler warning about inconsistent spacing
+      "unable to resolve a negative suffixed numeric literal following an operator"
+    doAssert 1'wrap + -1'wrap == "[[1]]plus[[-1]]"
+    doAssert 1'twrap == "[[1]]"
+    doAssert extraContext() == "[[22.40]]":
+      "unable to return a suffixed numeric literal by an implicit return"
+    doAssert 0x5a3a'wrap == "[[0x5a3a]]"
+    doAssert 0o5732'wrap == "[[0o5732]]"
+    doAssert 0b0101111010101'wrap == "[[0b0101111010101]]"
+    doAssert -38383839292839283928392839283928392839283.928493849385935898243e-50000'wrap == "[[-38383839292839283928392839283928392839283.928493849385935898243e-50000]]"
+    doAssert 1234.56'wrap == "[[1234.56]]":
+      "unable to properly account for context with suffixed numeric literals"
+
+  block: # verify that the i64, f32, etc builtin suffixes still parse correctly
+    const expectedF32: float32 = 123.125
+    proc `'f9`(number: string): string =   # proc starts with 'f' just like 'f32'
+      "[[" & number & "]]"
+    proc `'f32a`(number: string): string =   # looks even more like 'f32'
+      "[[" & number & "]]"
+    proc `'d9`(number: string): string =   # proc starts with 'd' just like the d suffix
+      "[[" & number & "]]"
+    proc `'i9`(number: string): string =   # proc starts with 'i' just like 'i64'
+      "[[" & number & "]]"
+    proc `'u9`(number: string): string =   # proc starts with 'u' just like 'u8'
+      "[[" & number & "]]"
+
+    doAssert 123.125f32 == expectedF32:
+      "failing to support non-quoted legacy f32 floating point suffix"
+    doAssert 123.125'f32 == expectedF32
+    doAssert 123.125e0'f32 == expectedF32
+    doAssert 1234.56'wrap == 1234.56'f9
+    doAssert 1234.56'wrap == 1234.56'f32a
+    doAssert 1234.56'wrap == 1234.56'd9
+    doAssert 1234.56'wrap == 1234.56'i9
+    doAssert 1234.56'wrap == 1234.56'u9
+    doAssert lispReprStr(1234.56'u9) == """(DotExpr (RStrLit "1234.56") (Ident "\'u9"))""":
+      "failed to properly build AST for suffix that starts with u"
+    doAssert -128'i8 == (-128).int8
+
+  block: # case checks
+    doAssert 1E2 == 100:
+      "lexer not handling upper-case exponent"
+    doAssert 1.0E2 == 100.0
+    doAssert 1e2 == 100
+    doAssert 0xdeadBEEF'wrap == "[[0xdeadBEEF]]":
+      "lexer not maintaining original case"
+    doAssert 0.1E12'wrap == "[[0.1E12]]"
+    doAssert 0.0e12'wrap == "[[0.0e12]]"
+    doAssert 0.0e+12'wrap == "[[0.0e+12]]"
+    doAssert 0.0e-12'wrap == "[[0.0e-12]]"
+    doAssert 0e-12'wrap == "[[0e-12]]"
+
+  block: # macro and template usage
+    template `'foo`(a: string): untyped = (a, 2)
+    doAssert -12'foo == ("-12", 2)
+    template `'fooplus`(a: string, b: int): untyped = (a, b)
+    doAssert -12'fooplus(2) == ("-12", 2)
+    template `'fooplusopt`(a: string, b: int = 99): untyped = (a, b)
+    doAssert -12'fooplusopt(2) == ("-12", 2)
+    doAssert -12'fooplusopt() == ("-12", 99)
+    doAssert -12'fooplusopt == ("-12", 99)
+    macro `'bar`(a: static string): untyped =
+      var infix = newNimNode(nnkInfix)
+      infix.add newIdentNode("&")
+      infix.add newLit("got ")
+      infix.add newLit(a.repr)
+      result = newNimNode(nnkStmtList)
+      result.add infix
+    doAssert -12'bar == "got \"-12\""
+    macro deb(a): untyped = newLit(a.repr)
+    doAssert deb(-12'bar) == "-12'bar"
+    # macro metawrap(): untyped =
+    #   func wrap1(a: string): string = "{" & a & "}"
+    #   func `'wrap2`(a: string): string = "{" & a & "}"
+    #   result = quote do:
+    #     let a1 = wrap1"-128"
+    #     let a2 = -128'wrap2
+    # metawrap()
+    # doAssert a1 == "{-128}"
+    # doAssert a2 == "{-128}"
+
+static: main()
+main()
diff --git a/tests/lexer/tstrlits.nim b/tests/lexer/tstrlits.nim
deleted file mode 100644
index 8e8250a5b..000000000
--- a/tests/lexer/tstrlits.nim
+++ /dev/null
@@ -1,19 +0,0 @@
-discard """
-  output: "a\"\"long string\"\"\"\"\"abc\"def_'2'●𝌆𝌆A"
-"""
-# Test the new different string literals
-
-const
-  tripleEmpty = """"long string"""""""" # "long string """""
-
-  rawQuote = r"a"""
-
-  raw = r"abc""def"
-
-  escaped = "\x5f'\50'\u25cf\u{1D306}\u{1d306}\u{41}"
-
-
-stdout.write(rawQuote)
-stdout.write(tripleEmpty)
-stdout.write(raw)
-stdout.writeLine(escaped)
diff --git a/tests/lexer/tunary_minus.nim b/tests/lexer/tunary_minus.nim
index 89f1b79ef..0aa861d53 100644
--- a/tests/lexer/tunary_minus.nim
+++ b/tests/lexer/tunary_minus.nim
@@ -6,13 +6,7 @@ discard """
 
 import std/[macros, strutils]
 
-macro lispReprStr*(a: untyped): untyped = newLit(a.lispRepr)
-
-macro assertAST*(expected: string, struct: untyped): untyped =
-  var ast = newLit(struct.treeRepr)
-  result = quote do:
-    if `ast` != `expected`:
-      doAssert false, "\nGot:\n" & `ast`.indent(2) & "\nExpected:\n" & `expected`.indent(2)
+import mlexerutils
 
 const one = 1
 const minusOne = `-`(one)
author	Andreas Rumpf <rumpf_a@web.de>	2021-03-24 14:46:19 +0100
committer	GitHub <noreply@github.com>	2021-03-24 14:46:19 +0100
commit	5f5a92379fab59db0e7e3da5a9cec2a11be45ed8 (patch)
tree	1bce47645068dd814f992c221d14e56bb0b2c932
parent	7366a3da37605b230823dd4b6db07abb70dbd40b (diff)
download	Nim-5f5a92379fab59db0e7e3da5a9cec2a11be45ed8.tar.gz