summary refs log tree commit diff stats
path: root/lib/pure/punycode.nim
blob: ab6501ed182a12d3db27b4b8282fac1c57514ff7 (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi {
#
#
#            Nim's Runtime Library
#        (c) Copyright 2016 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

import strutils
import unicode

# issue #3045

const
  Base = 36
  TMin = 1
  TMax = 26
  Skew = 38
  Damp = 700
  InitialBias = 72
  InitialN = 128
  Delimiter = '-'

type
  PunyError* = object of Exception

proc decodeDigit(x: char): int {.raises: [PunyError].} =
  if '0' <= x and x <= '9':
    result = ord(x) - (ord('0') - 26)
  elif 'A' <= x and x <= 'Z':
    result = ord(x) - ord('A')
  elif 'a' <= x and x <= 'z':
    result = ord(x) - ord('a')
  else:
    raise newException(PunyError, "Bad input")

proc encodeDigit(digit: int): Rune {.raises: [PunyError].} =
  if 0 <= digit and digit < 26:
    result = Rune(digit + ord('a'))
  elif 26 <= digit and digit < 36:
    result = Rune(digit + (ord('0') - 26))
  else:
    raise newException(PunyError, "internal error in punycode encoding")

proc isBasic(c: char): bool = ord(c) < 0x80
proc isBasic(r: Rune): bool = int(r) < 0x80

proc adapt(delta, numPoints: int, first: bool): int =
  var d = if first: delta div Damp else: delta div 2
  d += d div numPoints
  var k = 0
  while d > ((Base-TMin)*TMax) div 2:
    d = d div (Base - TMin)
    k += Base
  result = k + (Base - TMin + 1) * d div (d + Skew)

proc encode*(prefix, s: string): string {.raises: [PunyError].} =
  ## Encode a string that may contain Unicode.
  ## Prepend `prefix` to the result
  result = prefix
  var (d, n, bias) = (0, InitialN, InitialBias)
  var (b, remaining) = (0, 0)
  for r in s.runes:
    if r.isBasic:
      # basic Ascii character
      inc b
      result.add($r)
    else:
      # special character
      inc remaining

  var h = b
  if b > 0:
    result.add(Delimiter) # we have some Ascii chars
  while remaining != 0:
    var m: int = high(int32)
    for r in s.runes:
      if m > int(r) and int(r) >= n:
        m = int(r)
    d += (m - n) * (h + 1)
    if d < 0:
      raise newException(PunyError, "invalid label " & s)
    n = m
    for r in s.runes:
      if int(r) < n:
        inc d
        if d < 0:
          raise newException(PunyError, "invalid label " & s)
        continue
      if int(r) > n:
        continue
      var q = d
      var k = Base
      while true:
        var t = k - bias
        if t < TMin:
          t = TMin
        elif t > TMax:
          t = TMax
        if q < t:
          break
        result.add($encodeDigit(t + (q - t) mod (Base - t)))
        q = (q - t) div (Base - t)
        k += Base
      result.add($encodeDigit(q))
      bias = adapt(d, h + 1, h == b)
      d = 0
      inc h
      dec remaining
    inc d
    inc n

proc encode*(s: string): string {.raises: [PunyError].} =
  ## Encode a string that may contain Unicode. Prefix is empty.
  result = encode("", s)

proc decode*(encoded: string): string {.raises: [PunyError].}  =
  ## Decode a Punycode-encoded string
  var
    n = InitialN
    i = 0
    bias = InitialBias
  var d = rfind(encoded, Delimiter)
  result = ""

  if d > 0:
    # found Delimiter
    for j in 0..<d:
      var c = encoded[j] # char
      if not c.isBasic:
        raise newException(PunyError, "Encoded contains a non-basic char")
      result.add(c) # add the character
    inc d
  else:
    d = 0 # set to first index

  while (d < len(encoded)):
    var oldi = i
    var w = 1
    var k = Base
    while true:
      if d == len(encoded):
        raise newException(PunyError, "Bad input: " & encoded)
      var c = encoded[d]; inc d
      var digit = int(decodeDigit(c))
      if digit > (high(int32) - i) div w:
        raise newException(PunyError, "Too large a value: " & $digit)
      i += digit * w
      var t: int
      if k <= bias:
        t = TMin
      elif k >= bias + TMax:
        t = TMax
      else:
        t = k - bias
      if digit < t:
        break
      w *= Base - t
      k += Base
    bias = adapt(i - oldi, runelen(result) + 1, oldi == 0)

    if i div (runelen(result) + 1) > high(int32) - n:
      raise newException(PunyError, "Value too large")

    n += i div (runelen(result) + 1)
    i = i mod (runelen(result) + 1)
    insert(result, $Rune(n), i)
    inc i

when isMainModule:
  assert(decode(encode("", "bücher")) == "bücher")
  assert(decode(encode("münchen")) == "münchen")
  assert encode("xn--", "münchen") == "xn--mnchen-3ya"