diff options
author | bptato <nincsnevem662@gmail.com> | 2021-12-29 02:25:40 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2021-12-29 02:25:40 +0100 |
commit | 53f9d91dd6377c4f689e2b237d4a25c33c1d9e16 (patch) | |
tree | 0bd48aeea9c8b0f4ddf0531a8f3a7df02f6124e5 /src/utils/twtstr.nim | |
parent | b7f19942e3d0a4cf25981c1686ffdb200ec4fd6f (diff) | |
download | chawan-53f9d91dd6377c4f689e2b237d4a25c33c1d9e16.tar.gz |
Add punycode support
Diffstat (limited to 'src/utils/twtstr.nim')
-rw-r--r-- | src/utils/twtstr.nim | 96 |
1 files changed, 84 insertions, 12 deletions
diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 5fb45111..6d42134b 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -8,6 +8,9 @@ import math import sugar import sequtils import options +import punycode + +import data/idna when defined(posix): import posix @@ -104,7 +107,7 @@ func findChar*(str: string, c: Rune, start: int = 0): int = func getLowerChars*(): string = result = "" for i in 0..255: - if chr(i) >= 'A' and chr(i) <= 'Z': + if chr(i) in 'A'..'Z': result &= chr(i + 32) else: result &= chr(i) @@ -114,6 +117,11 @@ const lowerChars = getLowerChars() func tolower*(c: char): char = return lowerChars[int(c)] +func toAsciiLower*(str: string): string = + result = newString(str.len) + for i in 0..str.high: + result[i] = str[i].tolower() + func getrune(s: string): Rune = return s.toRunes()[0] @@ -165,8 +173,8 @@ func decValue*(r: Rune): int = const HexChars = "0123456789ABCDEF" func toHex*(c: char): string = result = newString(2) - result[0] = HexChars[(int8(c) and 0xF)] - result[1] = HexChars[(int8(c) shr 4)] + result[0] = HexChars[(uint8(c) and 0xF)] + result[1] = HexChars[(uint8(c) shr 4)] func equalsIgnoreCase*(s1: seq[Rune], s2: string): bool = var i = 0 @@ -449,13 +457,79 @@ func clearControls*(s: string): string = if c notin Controls: result &= c -#TODO ugh this'll take a while to implement properly -func domainToAscii*(domain: string): Option[string] = - result = some("") - for c in domain: - if c notin Ascii: - return none(string) - result.get &= c +func processIdna(str: string, checkhyphens, checkbidi, checkjoiners, transitionalprocessing: bool): Option[string] = + var mapped = "" + var i = 0 + while i < str.len: + var r: Rune + fastRuneAt(str, i, r) + let status = getIdnaTableStatus(r) + case status + of IDNA_DISALLOWED: return none(string) #error + of IDNA_IGNORED: discard + of IDNA_MAPPED: mapped &= getIdnaMapped(r) + of IDNA_DEVIATION: + if transitionalprocessing: mapped &= getDeviationMapped(r) + else: mapped &= r + of IDNA_VALID: mapped &= r + + #TODO normalize + var labels: seq[string] + for label in str.split('.'): + var s = label + if label.startsWith("xn--"): + try: + s = punycode.decode(label) + except PunyError: + return none(string) #error + #TODO check normalization + if checkhyphens: + if s.len >= 4 and s[2] == '-' and s[3] == '-': + return none(string) #error + if s.len > 0 and s[0] == '-' and s[^1] == '-': + return none(string) #error + var i = 0 + while i < s.len: + if s[i] == '.': + return none(string) #error + var r: Rune + fastRuneAt(str, i, r) + #TODO check general category mark + let status = getIdnaTableStatus(r) + case status + of IDNA_DISALLOWED, IDNA_IGNORED, IDNA_MAPPED: + return none(string) #error + of IDNA_DEVIATION: + if transitionalprocessing: + return none(string) #error + of IDNA_VALID: discard + #TODO check joiners + #TODO check bidi + labels.add(s) + return labels.join('.').some + +func unicodeToAscii*(s: string, checkhyphens, checkbidi, checkjoiners, transitionalprocessing, verifydnslength: bool): Option[string] = + let processed = s.processIdna(checkhyphens, checkbidi, checkjoiners, + transitionalprocessing) + if processed.isnone: + return none(string) #error + var labels: seq[string] + for label in processed.get.split('.'): + var needsconversion = false + for c in label: + if c notin Ascii: + needsconversion = true + break + if needsconversion: + try: + let converted = "xn--" & punycode.encode(label) + #TODO verify dns length + labels.add(converted) + except PunyError: + return none(string) #error + else: + labels.add(label) + return labels.join('.').some proc expandPath*(path: string): string = if path.len == 0: @@ -672,8 +746,6 @@ const ambiguous = [ # variant might be useful for users of CJK legacy encodings who want to migrate # to UCS without changing the traditional terminal character-width behaviour. # It is not otherwise recommended for general use. -# -# TODO: currently these are unused, the user should be able to toggle them # auxiliary function for binary search in interval table func bisearch(ucs: Rune, table: openarray[(int, int)]): bool = |