# # # Nim's Runtime Library # (c) Copyright 2016 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## .. note:: In order to use this module, run `nimble install punycode`. ## ## Implements a representation of Unicode with the limited ## ASCII character subset. import strutils import unicode # issue #3045 const Base = 36 TMin = 1 TMax = 26 Skew = 38 Damp = 700 InitialBias = 72 InitialN = 128 Delimiter = '-' type PunyError* = object of ValueError func decodeDigit(x: char): int {.raises: [PunyError].} = if '0' <= x and x <= '9': result = ord(x) - (ord('0') - 26) elif 'A' <= x and x <= 'Z': result = ord(x) - ord('A') elif 'a' <= x and x <= 'z': result = ord(x) - ord('a') else: raise newException(PunyError, "Bad input") func encodeDigit(digit: int): Rune {.raises: [PunyError].} = if 0 <= digit and digit < 26: result = Rune(digit + ord('a')) elif 26 <= digit and digit < 36: result = Rune(digit + (ord('0') - 26)) else: raise newException(PunyError, "internal error in punycode encoding") func isBasic(c: char): bool = ord(c) < 0x80 func isBasic(r: Rune): bool = int(r) < 0x80 func adapt(delta, numPoints: int, first: bool): int = var d = if first: delta div Damp else: delta div 2 d += d div numPoints var k = 0 while d > ((Base-TMin)*TMax) div 2: d = d div (Base - TMin) k += Base result = k + (Base - TMin + 1) * d div (d + Skew) func encode*(prefix, s: string): string {.raises: [PunyError].} = ## Encode a string that may contain Unicode. ## Prepend `prefix` to the result result = prefix var (d, n, bias) = (0, InitialN, InitialBias) var (b, remaining) = (0, 0) for r in s.runes: if r.isBasic: # basic Ascii character inc b result.add($r) else: # special character inc remaining var h = b if b > 0: result.add(Delimiter) # we have some Ascii chars while remaining != 0: var m: int = high(int32) for r in s.runes: if m > int(r) and int(r) >= n: m = int(r) d += (m - n) * (h + 1) if d < 0: raise newException(PunyError, "invalid label " & s) n = m for r in s.runes: if int(r) < n: inc d if d < 0: raise newException(PunyError, "invalid label " & s) continue if int(r) > n: continue var q = d var k = Base while true: var t = k - bias if t < TMin: t = TMin elif t > TMax: t = TMax if q < t: break result.add($encodeDigit(t + (q - t) mod (Base - t))) q = (q - t) div (Base - t) k += Base result.add($encodeDigit(q)) bias = adapt(d, h + 1, h == b) d = 0 inc h dec remaining inc d inc n func encode*(s: string): string {.raises: [PunyError].} = ## Encode a string that may contain Unicode. Prefix is empty. result = encode("", s) func decode*(encoded: string): string {.raises: [PunyError].} = ## Decode a Punycode-encoded string var n = InitialN i = 0 bias = InitialBias var d = rfind(encoded, Delimiter) var output: seq[Rune] if d > 0: # found Delimiter for j in 0.. (high(int32) - i) div w: raise newException(PunyError, "Too large a value: " & $digit) i += digit * w var t: int if k <= bias: t = TMin elif k >= bias + TMax: t = TMax else: t = k - bias if digit < t: break w *= Base - t k += Base bias = adapt(i - oldi, len(output) + 1, oldi == 0) if i div (len(output) + 1) > high(int32) - n: raise newException(PunyError, "Value too large") n += i div (len(output) + 1) i = i mod (len(output) + 1) insert(output, Rune(n), i) inc i result = $output runnableExamples: static: block: doAssert encode("") == "" doAssert encode("a") == "a-" doAssert encode("A") == "A-" doAssert encode("3") == "3-" doAssert encode("-") == "--" doAssert encode("--") == "---" doAssert encode("abc") == "abc-" doAssert encode("London") == "London-" doAssert encode("Lloyd-Atkinson") == "Lloyd-Atkinson-" doAssert encode("This has spaces") == "This has spaces-" doAssert encode("ü") == "tda" doAssert encode("München") == "Mnchen-3ya" doAssert encode("Mnchen-3ya") == "Mnchen-3ya-" doAssert encode("München-Ost") == "Mnchen-Ost-9db" doAssert encode("Bahnhof München-Ost") == "Bahnhof Mnchen-Ost-u6b" block: doAssert decode("") == "" doAssert decode("a-") == "a" doAssert decode("A-") == "A" doAssert decode("3-") == "3" doAssert decode("--") == "-" doAssert decode("---") == "--" doAssert decode("abc-") == "abc" doAssert decode("London-") == "London" doAssert decode("Lloyd-Atkinson-") == "Lloyd-Atkinson" doAssert decode("This has spaces-") == "This has spaces" doAssert decode("tda") == "ü" doAssert decode("Mnchen-3ya") == "München" doAssert decode("Mnchen-3ya-") == "Mnchen-3ya" doAssert decode("Mnchen-Ost-9db") == "München-Ost" doAssert decode("Bahnhof Mnchen-Ost-u6b") == "Bahnhof München-Ost"