# # # Nim's Runtime Library # (c) Copyright 2012 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. # ## This module provides support to handle the Unicode UTF-8 encoding. ## ## There are no specialized ``insert``, ``delete``, ``add`` and ``contains`` ## procedures for ``seq[Rune]`` in this module because the generic variants ## of these procedures in the system module already work with it. ## ## The current version is compatible with Unicode v12.0.0. ## ## **See also:** ## * `strutils module `_ ## * `unidecode module `_ ## * `encodings module `_ include "system/inclrtl" type RuneImpl = int32 # underlying type of Rune Rune* = distinct RuneImpl ## \ ## Type that can hold a single Unicode code point. ## ## A Rune may be composed with other Runes to a character on the screen. ## `RuneImpl` is the underlying type used to store Runes, currently `int32`. template ones(n: untyped): untyped = ((1 shl n)-1) proc runeLen*(s: string): int {.rtl, extern: "nuc$1".} = ## Returns the number of runes of the string ``s``. runnableExamples: let a = "añyóng" doAssert a.runeLen == 6 ## note: a.len == 8 var i = 0 while i < len(s): if uint(s[i]) <= 127: inc(i) elif uint(s[i]) shr 5 == 0b110: inc(i, 2) elif uint(s[i]) shr 4 == 0b1110: inc(i, 3) elif uint(s[i]) shr 3 == 0b11110: inc(i, 4) elif uint(s[i]) shr 2 == 0b111110: inc(i, 5) elif uint(s[i]) shr 1 == 0b1111110: inc(i, 6) else: inc i inc(result) proc runeLenAt*(s: string, i: Natural): int = ## Returns the number of bytes the rune starting at ``s[i]`` takes. ## ## See also: ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ runnableExamples: let a = "añyóng" doAssert a.runeLenAt(0) == 1 doAssert a.runeLenAt(1) == 2 if uint(s[i]) <= 127: result = 1 elif uint(s[i]) shr 5 == 0b110: result = 2 elif uint(s[i]) shr 4 == 0b1110: result = 3 elif uint(s[i]) shr 3 == 0b11110: result = 4 elif uint(s[i]) shr 2 == 0b111110: result = 5 elif uint(s[i]) shr 1 == 0b1111110: result = 6 else: result = 1 const replRune = Rune(0xFFFD) template fastRuneAt*(s: string, i: int, result: untyped, doInc = true) = ## Returns the rune ``s[i]`` in ``result``. ## ## If ``doInc == true`` (default), ``i`` is incremented by the number ## of bytes that have been processed. bind ones if uint(s[i]) <= 127: result = Rune(uint(s[i])) when doInc: inc(i) elif uint(s[i]) shr 5 == 0b110: # assert(uint(s[i+1]) shr 6 == 0b10) if i <= s.len - 2: result = Rune((uint(s[i]) and (ones(5))) shl 6 or (uint(s[i+1]) and ones(6))) when doInc: inc(i, 2) else: result = replRune when doInc: inc(i) elif uint(s[i]) shr 4 == 0b1110: # assert(uint(s[i+1]) shr 6 == 0b10) # assert(uint(s[i+2]) shr 6 == 0b10) if i <= s.len - 3: result = Rune((uint(s[i]) and ones(4)) shl 12 or (uint(s[i+1]) and ones(6)) shl 6 or (uint(s[i+2]) and ones(6))) when doInc: inc(i, 3) else: result = replRune when doInc: inc(i) elif uint(s[i]) shr 3 == 0b11110: # assert(uint(s[i+1]) shr 6 == 0b10) # assert(uint(s[i+2]) shr 6 == 0b10) # assert(uint(s[i+3]) shr 6 == 0b10) if i <= s.len - 4: result = Rune((uint(s[i]) and ones(3)) shl 18 or (uint(s[i+1]) and ones(6)) shl 12 or (uint(s[i+2]) and ones(6)) shl 6 or (uint(s[i+3]) and ones(6))) when doInc: inc(i, 4) else: result = replRune when doInc: inc(i) elif uint(s[i]) shr 2 == 0b111110: # assert(uint(s[i+1]) shr 6 == 0b10) # assert(uint(s[i+2]) shr 6 == 0b10) # assert(uint(s[i+3]) shr 6 == 0b10) # assert(uint(s[i+4]) shr 6 == 0b10) if i <= s.len - 5: result = Rune((uint(s[i]) and ones(2)) shl 24 or (uint(s[i+1]) and ones(6)) shl 18 or (uint(s[i+2]) and ones(6)) shl 12 or (uint(s[i+3]) and ones(6)) shl 6 or (uint(s[i+4]) and ones(6))) when doInc: inc(i, 5) else: result = replRune when doInc: inc(i) elif uint(s[i]) shr 1 == 0b1111110: # assert(uint(s[i+1]) shr 6 == 0b10) # assert(uint(s[i+2]) shr 6 == 0b10) # assert(uint(s[i+3]) shr 6 == 0b10) # assert(uint(s[i+4]) shr 6 == 0b10) # assert(uint(s[i+5]) shr 6 == 0b10) if i <= s.len - 6: result = Rune((uint(s[i]) and ones(1)) shl 30 or (uint(s[i+1]) and ones(6)) shl 24 or (uint(s[i+2]) and ones(6)) shl 18 or (uint(s[i+3]) and ones(6)) shl 12 or (uint(s[i+4]) and ones(6)) shl 6 or (uint(s[i+5]) and ones(6))) when doInc: inc(i, 6) else: result = replRune when doInc: inc(i) else: result = Rune(uint(s[i])) when doInc: inc(i) proc runeAt*(s: string, i: Natural): Rune = ## Returns the rune in ``s`` at **byte index** ``i``. ## ## See also: ## * `runeAtPos proc <#runeAtPos,string,int>`_ ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_ ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ runnableExamples: let a = "añyóng" doAssert a.runeAt(1) == "ñ".runeAt(0) doAssert a.runeAt(2) == "ñ".runeAt(1) doAssert a.runeAt(3) == "y".runeAt(0) fastRuneAt(s, i, result, false) proc validateUtf8*(s: string): int = ## Returns the position of the invalid byte in ``s`` if the string ``s`` does ## not hold valid UTF-8 data. Otherwise ``-1`` is returned. ## ## See also: ## * `toUTF8 proc <#toUTF8,Rune>`_ ## * `$ proc <#$,Rune>`_ alias for `toUTF8` ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ var i = 0 let L = s.len while i < L: if uint(s[i]) <= 127: inc(i) elif uint(s[i]) shr 5 == 0b110: if uint(s[i]) < 0xc2: return i # Catch overlong ascii representations. if i+1 < L and uint(s[i+1]) shr 6 == 0b10: inc(i, 2) else: return i elif uint(s[i]) shr 4 == 0b1110: if i+2 < L and uint(s[i+1]) shr 6 == 0b10 and uint(s[i+2]) shr 6 == 0b10: inc i, 3 else: return i elif uint(s[i]) shr 3 == 0b11110: if i+3 < L and uint(s[i+1]) shr 6 == 0b10 and uint(s[i+2]) shr 6 == 0b10 and uint(s[i+3]) shr 6 == 0b10: inc i, 4 else: return i else: return i return -1 template fastToUTF8Copy*(c: Rune, s: var string, pos: int, doInc = true) = ## Copies UTF-8 representation of ``c`` into the preallocated string ``s`` ## starting at position ``pos``. ## ## If ``doInc == true`` (default), ``pos`` is incremented ## by the number of bytes that have been processed. ## ## To be the most efficient, make sure ``s`` is preallocated ## with an additional amount equal to the byte length of ``c``. ## ## See also: ## * `validateUtf8 proc <#validateUtf8,string>`_ ## * `toUTF8 proc <#toUTF8,Rune>`_ ## * `$ proc <#$,Rune>`_ alias for `toUTF8` var i = RuneImpl(c) if i <=% 127: s.setLen(pos+1) s[pos+0] = chr(i) when doInc: inc(pos) elif i <=% 0x07FF: s.setLen(pos+2) s[pos+0] = chr((i shr 6) or 0b110_00000) s[pos+1] = chr((i and ones(6)) or 0b10_0000_00) when doInc: inc(pos, 2) elif i <=% 0xFFFF: s.setLen(pos+3) s[pos+0] = chr(i shr 12 or 0b1110_0000) s[pos+1] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 3) elif i <=% 0x001FFFFF: s.setLen(pos+4) s[pos+0] = chr(i shr 18 or 0b1111_0000) s[pos+1] = chr(i shr 12 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+3] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 4) elif i <=% 0x03FFFFFF: s.setLen(pos+5) s[pos+0] = chr(i shr 24 or 0b111110_00) s[pos+1] = chr(i shr 18 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i shr 12 and ones(6) or 0b10_0000_00) s[pos+3] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+4] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 5) elif i <=% 0x7FFFFFFF: s.setLen(pos+6) s[pos+0] = chr(i shr 30 or 0b1111110_0) s[pos+1] = chr(i shr 24 and ones(6) or 0b10_0000_00) s[pos+2] = chr(i shr 18 and ones(6) or 0b10_0000_00) s[pos+3] = chr(i shr 12 and ones(6) or 0b10_0000_00) s[pos+4] = chr(i shr 6 and ones(6) or 0b10_0000_00) s[pos+5] = chr(i and ones(6) or 0b10_0000_00) when doInc: inc(pos, 6) else: discard # error, exception? proc toUTF8*(c: Rune): string {.rtl, extern: "nuc$1".} = ## Converts a rune into its UTF-8 representation. ## ## See also: ## * `validateUtf8 proc <#validateUtf8,string>`_ ## * `$ proc <#$,Rune>`_ alias for `toUTF8` ## * `utf8 iterator <#utf8.i,string>`_ ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ runnableExamples: let a = "añyóng" doAssert a.runeAt(1).toUTF8 == "ñ" result = "" fastToUTF8Copy(c, result, 0, false) proc add*(s: var string; c: Rune) = ## Adds a rune ``c`` to a string ``s``. runnableExamples: var s = "abc" let c = "ä".runeAt(0) s.add(c) doAssert s == "abcä" let pos = s.len fastToUTF8Copy(c, s, pos, false) proc `$`*(rune: Rune): string = ## An alias for `toUTF8 <#toUTF8,Rune>`_. ## ## See also: ## * `validateUtf8 proc <#validateUtf8,string>`_ ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ rune.toUTF8 proc `$`*(runes: seq[Rune]): string = ## Converts a sequence of Runes to a string. ## ## See also: ## * `toRunes <#toRunes,string>`_ for a reverse operation runnableExamples: let someString = "öÑ" someRunes = toRunes(someString) doAssert $someRunes == someString result = "" for rune in runes: result.add rune proc runeOffset*(s: string, pos: Natural, start: Natural = 0): int = ## Returns the byte position of rune ## at position ``pos`` in ``s`` with an optional start byte position. ## Returns the special value -1 if it runs out of the string. ## ## **Beware:** This can lead to unoptimized code and slow execution! ## Most problems can be solved more efficiently by using an iterator ## or conversion to a seq of Rune. ## ## See also: ## * `runeReverseOffset proc <#runeReverseOffset,string,Positive>`_ runnableExamples: let a = "añyóng" doAssert a.runeOffset(1) == 1 doAssert a.runeOffset(3) == 4 doAssert a.runeOffset(4) == 6 var i = 0 o = start while i < pos: o += runeLenAt(s, o) if o >= s.len: return -1 inc i return o proc runeReverseOffset*(s: string, rev: Positive): (int, int) = ## Returns a tuple with the byte offset of the ## rune at position ``rev`` in ``s``, counting ## from the end (starting with 1) and the total ## number of runes in the string. ## ## Returns a negative value for offset if there are to few runes in ## the string to satisfy the request. ## ## **Beware:** This can lead to unoptimized code and slow execution! ## Most problems can be solved more efficiently by using an iterator ## or conversion to a seq of Rune. ## ## See also: ## * `runeOffset proc <#runeOffset,string,Natural,Natural>`_ var a = rev.int o = 0 x = 0 while o < s.len: let r = runeLenAt(s, o) o += r if a < 0: x += r dec a if a > 0: return (-a, rev.int-a) return (x, -a+rev.int) proc runeAtPos*(s: string, pos: int): Rune = ## Returns the rune at position ``pos``. ## ## **Beware:** This can lead to unoptimized code and slow execution! ## Most problems can be solved more efficiently by using an iterator ## or conversion to a seq of Rune. ## ## See also: ## * `runeAt proc <#runeAt,string,Natural>`_ ## * `runeStrAtPos proc <#runeStrAtPos,string,Natural>`_ ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ fastRuneAt(s, runeOffset(s, pos), result, false) proc runeStrAtPos*(s: string, pos: Natural): string = ## Returns the rune at position ``pos`` as UTF8 String. ## ## **Beware:** This can lead to unoptimized code and slow execution! ## Most problems can be solved more efficiently by using an iterator ## or conversion to a seq of Rune. ## ## See also: ## * `runeAt proc <#runeAt,string,Natural>`_ ## * `runeAtPos proc <#runeAtPos,string,int>`_ ## * `fastRuneAt template <#fastRuneAt.t,string,int,untyped>`_ let o = runeOffset(s, pos) s[o .. (o+runeLenAt(s, o)-1)] proc runeSubStr*(s: string, pos: int, len: int = int.high): string = ## Returns the UTF-8 substring starting at code point ``pos`` ## with ``len`` code points. ## ## If ``pos`` or ``len`` is negative they count from ## the end of the string. If ``len`` is not given it means the longest ## possible string. runnableExamples: let s = "Hänsel ««: 10,00€" doAssert(runeSubStr(s, 0, 2) == "Hä") doAssert(runeSubStr(s, 10, 1) == ":") doAssert(runeSubStr(s, -6) == "10,00€") doAssert(runeSubStr(s, 10) == ": 10,00€") doAssert(runeSubStr(s, 12, 5) == "10,00") doAssert(runeSubStr(s, -6, 3) == "10,") if pos < 0: let (o, rl) = runeReverseOffset(s, -pos) if len >= rl: result = s.substr(o, s.len-1) elif len < 0: let e = rl + len if e < 0: result = "" else: result = s.substr(o, runeOffset(s, e-(rl+pos), o)-1) else: result = s.substr(o, runeOffset(s, len, o)-1) else: let o = runeOffset(s, pos) if o < 0: result = "" elif len == int.high: result = s.substr(o, s.len-1) elif len < 0: let (e, rl) = runeReverseOffset(s, -len) discard rl if e <= 0: result = "" else: result = s.substr(o, e-1) else: var e = runeOffset(s, len, o) if e < 0: e = s.len result = s.substr(o, e-1) proc `<=%`*(a, b: Rune): bool = ## Checks if code point of `a` is smaller or equal to code point of `b`. runnableExamples: let a = "ú".runeAt(0) b = "ü".runeAt(0) doAssert a <=% b return int(a) <=% int(b) proc `<%`*(a, b: Rune): bool = ## Checks if code point of `a` is smaller than code point of `b`. runnableExamples: let a = "ú".runeAt(0) b = "ü".runeAt(0) doAssert a <% b return int(a) <% int(b) proc `==`*(a, b: Rune): bool = ## Checks if two runes are equal. return int(a) == int(b) include "includes/unicode_ranges" proc binarySearch(c: RuneImpl, tab: openArray[int], len, stride: int): int = var n = len var t = 0 while n > 1: var m = n div 2 var p = t + m*stride if c >= tab[p]: t = p n = n-m else: n = m if n != 0 and c >= tab[t]: return t return -1 proc toLower*(c: Rune): Rune {.rtl, extern: "nuc$1".} = ## Converts ``c`` into lower case. This works for any rune. ## ## If possible, prefer ``toLower`` over ``toUpper``. ## ## See also: ## * `toUpper proc <#toUpper,Rune>`_ ## * `toTitle proc <#toTitle,Rune>`_ ## * `isLower proc <#isLower,Rune>`_ var c = RuneImpl(c) var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: return Rune(c + toLowerRanges[p+2] - 500) p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) if p >= 0 and c == toLowerSinglets[p]: return Rune(c + toLowerSinglets[p+1] - 500) return Rune(c) proc toUpper*(c: Rune): Rune {.rtl, extern: "nuc$1".} = ## Converts ``c`` into upper case. This works for any rune. ## ## If possible, prefer ``toLower`` over ``toUpper``. ## ## See also: ## * `toLower proc <#toLower,Rune>`_ ## * `toTitle proc <#toTitle,Rune>`_ ## * `isUpper proc <#isUpper,Rune>`_ var c = RuneImpl(c) var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: return Rune(c + toUpperRanges[p+2] - 500) p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) if p >= 0 and c == toUpperSinglets[p]: return Rune(c + toUpperSinglets[p+1] - 500) return Rune(c) proc toTitle*(c: Rune): Rune {.rtl, extern: "nuc$1".} = ## Converts ``c`` to title case. ## ## See also: ## * `toLower proc <#toLower,Rune>`_ ## * `toUpper proc <#toUpper,Rune>`_ ## * `isTitle proc <#isTitle,Rune>`_ var c = RuneImpl(c) var p = binarySearch(c, toTitleSinglets, len(toTitleSinglets) div 2, 2) if p >= 0 and c == toTitleSinglets[p]: return Rune(c + toTitleSinglets[p+1] - 500) return Rune(c) proc isLower*(c: Rune): bool {.rtl, extern: "nuc$1".} = ## Returns true if ``c`` is a lower case rune. ## ## If possible, prefer ``isLower`` over ``isUpper``. ## ## See also: ## * `toLower proc <#toLower,Rune>`_ ## * `isUpper proc <#isUpper,Rune>`_ ## * `isTitle proc <#isTitle,Rune>`_ var c = RuneImpl(c) # Note: toUpperRanges is correct here! var p = binarySearch(c, toUpperRanges, len(toUpperRanges) div 3, 3) if p >= 0 and c >= toUpperRanges[p] and c <= toUpperRanges[p+1]: return true p = binarySearch(c, toUpperSinglets, len(toUpperSinglets) div 2, 2) if p >= 0 and c == toUpperSinglets[p]: return true proc isUpper*(c: Rune): bool {.rtl, extern: "nuc$1".} = ## Returns true if ``c`` is a upper case rune. ## ## If possible, prefer ``isLower`` over ``isUpper``. ## ## See also: ## * `toUpper proc <#toUpper,Rune>`_ ## * `isLower proc <#isLower,Rune>`_ ## * `isTitle proc <#isTitle,Rune>`_ ## * `isAlpha proc <#isAlpha,Rune>`_ ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_ var c = RuneImpl(c) # Note: toLowerRanges is correct here! var p = binarySearch(c, toLowerRanges, len(toLowerRanges) div 3, 3) if p >= 0 and c >= toLowerRanges[p] and c <= toLowerRanges[p+1]: return true p = binarySearch(c, toLowerSinglets, len(toLowerSinglets) div 2, 2) if p >= 0 and c == toLowerSinglets[p]: return true proc isAlpha*(c: Rune): bool {.rtl, extern: "nuc$1".} = ## Returns true if ``c`` is an *alpha* rune (i.e., a letter). ## ## See also: ## * `isLower proc <#isLower,Rune>`_ ## * `isTitle proc <#isTitle,Rune>`_ ## * `isAlpha proc <#isAlpha,Rune>`_ ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_ ## * `isCombining proc <#isCombining,Rune>`_ if isUpper(c) or isLower(c): return true var c = RuneImpl(c) var p = binarySearch(c, alphaRanges, len(alphaRanges) div 2, 2) if p >= 0 and c >= alphaRanges[p] and c <= alphaRanges[p+1]: return true p = binarySearch(c, alphaSinglets, len(alphaSinglets), 1) if p >= 0 and c == alphaSinglets[p]: return true proc isTitle*(c: Rune): bool {.rtl, extern: "nuc$1".} = ## Returns true if ``c`` is a Unicode titlecase code point. ## ## See also: ## * `toTitle proc <#toTitle,Rune>`_ ## * `isLower proc <#isLower,Rune>`_ ## * `isUpper proc <#isUpper,Rune>`_ ## * `isAlpha proc <#isAlpha,Rune>`_ ## * `isWhiteSpace proc <#isWhiteSpace,Rune>`_ return isUpper(c) and isLower(c) proc isWhiteSpace*(c: Rune): bool {.rtl, extern: "nuc$1".} = ## Returns true if ``c`` is a Unicode whitespace code point. ## ## See also: ## * `isLower proc <#isLower,Rune>`_ ## * `isUpper proc <#isUpper,Rune>`_ ## * `isTitle proc <#isTitle,Rune>`_ ## * `isAlpha proc <#isAlpha,Rune>`_ var c = RuneImpl(c) var p = binarySearch(c, spaceRanges, len(spaceRanges) div 2, 2) if p >= 0 and c >= spaceRanges[p] and c <= spaceRanges[p+1]: return true proc isCombining*(c: Rune): bool {.rtl, extern: "nuc$1".} = ## Returns true if ``c`` is a Unicode combining code unit. ## ## See also: ## * `isLower proc <#isLower,Rune>`_ ## * `isUpper proc <#isUpper,Rune>`_ ## * `isTitle proc <#isTitle,Rune>`_ ## * `isAlpha proc <#isAlpha,Rune>`_ var c = RuneImpl(c) # Optimized to return false immediately for ASCII return c >= 0x0300 and (c <= 0x036f or (c >= 0x1ab0 and c <= 0x1aff) or (c >= 0x1dc0 and c <= 0x1dff) or (c >= 0x20d0 and c <= 0x20ff) or (c >= 0xfe20 and c <= 0xfe2f)) template runeCheck(s, runeProc) = ## Common code for isAlpha and isSpace. result = if len(s) == 0: false else: true var i = 0 rune: Rune while i < len(s) and result: fastRuneAt(s, i, rune, doInc = true) result = runeProc(rune) and result proc isAlpha*(s: string): bool {.noSideEffect, rtl, extern: "nuc$1Str".} = ## Returns true if ``s`` contains all alphabetic runes. runnableExamples: let a = "añyóng" doAssert a.isAlpha runeCheck(s, isAlpha) proc isSpace*(s: string): bool {.noSideEffect, rtl, extern: "nuc$1Str".} = ## Returns true if ``s`` contains all whitespace runes. runnableExamples: let a = "\t\l \v\r\f" doAssert a.isSpace runeCheck(s, isWhiteSpace) template convertRune(s, runeProc) = ## Convert runes in ``s`` using ``runeProc`` as the converter. result = newString(len(s)) var i = 0 resultIndex = 0 rune: Rune while i < len(s): fastRuneAt(s, i, rune, doInc = true) rune = runeProc(rune) fastToUTF8Copy(rune, result, resultIndex, doInc = true) proc toUpper*(s: string): string {.noSideEffect, rtl, extern: "nuc$1Str".} = ## Converts ``s`` into upper-case runes. runnableExamples: doAssert toUpper("abγ") == "ABΓ" convertRune(s, toUpper) proc toLower*(s: string): string {.noSideEffect, rtl, extern: "nuc$1Str".} = ## Converts ``s`` into lower-case runes. runnableExamples: doAssert toLower("ABΓ") == "abγ" convertRune(s, toLower) proc swapCase*(s: string): string {.noSideEffect, rtl, extern: "nuc$1".} = ## Swaps the case of runes in ``s``. ## ## Returns a new string such that the cases of all runes ## are swapped if possible. runnableExamples: doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA" var i = 0 resultIndex = 0 rune: Rune result = newString(len(s)) while i < len(s): fastRuneAt(s, i, rune) if rune.isUpper(): rune = rune.toLower() elif rune.isLower(): rune = rune.toUpper() fastToUTF8Copy(rune, result, resultIndex, doInc = true) proc capitalize*(s: string): string {.noSideEffect, rtl, extern: "nuc$1".} = ## Converts the first character of ``s`` into an upper-case rune. runnableExamples: doAssert capitalize("βeta") == "Βeta" if len(s) == 0: return "" var rune: Rune i = 0 fastRuneAt(s, i, rune, doInc = true) result = $toUpper(rune) & substr(s, i) proc translate*(s: string, replacements: proc(key: string): string): string {. rtl, extern: "nuc$1".} = ## Translates words in a string using the ``replacements`` proc to substitute ## words inside ``s`` with their replacements. ## ## ``replacements`` is any proc that takes a word and returns ## a new word to fill it's place. runnableExamples: proc wordToNumber(s: string): string = case s of "one": "1" of "two": "2" else: s let a = "one two three four" doAssert a.translate(wordToNumber) == "1 2 three four" # Allocate memory for the new string based on the old one. # If the new string length is less than the old, no allocations # will be needed. If the new string length is greater than the # old, then maybe only one allocation is needed result = newStringOfCap(s.len) var index = 0 lastIndex = 0 wordStart = 0 inWord = false rune: Rune while index < len(s): lastIndex = index fastRuneAt(s, index, rune) let whiteSpace = rune.isWhiteSpace() if whiteSpace and inWord: # If we've reached the end of a word let word = s[wordStart ..< lastIndex] result.add(replacements(word)) result.add($rune) inWord = false elif not whiteSpace and not inWord: # If we've hit a non space character and # are not currently in a word, track # the starting index of the word inWord = true wordStart = lastIndex elif whiteSpace: result.add($rune) if wordStart < len(s) and inWord: # Get the trailing word at the end let word = s[wordStart .. ^1] result.add(replacements(word)) proc title*(s: string): string {.noSideEffect, rtl, extern: "nuc$1".} = ## Converts ``s`` to a unicode title. ## ## Returns a new string such that the first character ## in each word inside ``s`` is capitalized. runnableExamples: doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma" var i = 0 resultIndex = 0 rune: Rune result = newString(len(s)) var firstRune = true while i < len(s): fastRuneAt(s, i, rune) if not rune.isWhiteSpace() and firstRune: rune = rune.toUpper() firstRune = false elif rune.isWhiteSpace(): firstRune = true fastToUTF8Copy(rune, result, resultIndex, doInc = true) iterator runes*(s: string): Rune = ## Iterates over any rune of the string ``s`` returning runes. var i = 0 result: Rune while i < len(s): fastRuneAt(s, i, result, true) yield result iterator utf8*(s: string): string = ## Iterates over any rune of the string ``s`` returning utf8 values. ## ## See also: ## * `validateUtf8 proc <#validateUtf8,string>`_ ## * `toUTF8 proc <#toUTF8,Rune>`_ ## * `$ proc <#$,Rune>`_ alias for `toUTF8` ## * `fastToUTF8Copy template <#fastToUTF8Copy.t,Rune,string,int>`_ var o = 0 while o < s.len: let n = runeLenAt(s, o) yield s[o .. (o+n-1)] o += n proc toRunes*(s: string): seq[Rune] = ## Obtains a sequence containing the Runes in ``s``. ## ## See also: ## * `$ proc <#$,seq[T][Rune]>`_ for a reverse operation runnableExamples: let a = toRunes("aáä") doAssert a == @["a".runeAt(0), "á".runeAt(0), "ä".runeAt(0)] result = newSeq[Rune]() for r in s.runes: result.add(r) proc cmpRunesIgnoreCase*(a, b: string): int {.rtl, extern: "nuc$1".} = ## Compares two UTF-8 strings and ignores the case. Returns: ## ## | 0 if a == b ## | < 0 if a < b ## | > 0 if a > b var i = 0 var j = 0 var ar, br: Rune while i < a.len and j < b.len: # slow path: fastRuneAt(a, i, ar) fastRuneAt(b, j, br) result = RuneImpl(toLower(ar)) - RuneImpl(toLower(br)) if result != 0: return result = a.len - b.len proc reversed*(s: string): string = ## Returns the reverse of ``s``, interpreting it as runes. ## ## Unicode combining characters are correctly interpreted as well. runnableExamples: assert reversed("Reverse this!") == "!siht esreveR" assert reversed("先秦兩漢") == "漢兩秦先" assert reversed("as⃝df̅") == "f̅ds⃝a" assert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" var i = 0 lastI = 0 newPos = len(s) - 1 blockPos = 0 r: Rune template reverseUntil(pos) = var j = pos - 1 while j > blockPos: result[newPos] = s[j] dec j dec newPos blockPos = pos - 1 result = newString(len(s)) while i < len(s): lastI = i fastRuneAt(s, i, r, true) if not isCombining(r): reverseUntil(lastI) reverseUntil(len(s)) proc graphemeLen*(s: string; i: Natural): Natural = ## The number of bytes belonging to byte index ``s[i]``, ## including following combining code unit. runnableExamples: let a = "añyóng" doAssert a.graphemeLen(1) == 2 ## ñ doAssert a.graphemeLen(2) == 1 doAssert a.graphemeLen(4) == 2 ## ó var j = i.int var r, r2: Rune if j < s.len: fastRuneAt(s, j, r, true) result = j-i while j < s.len: fastRuneAt(s, j, r2, true) if not isCombining(r2): break result = j-i proc lastRune*(s: string; last: int): (Rune, int) = ## Length of the last rune in ``s[0..last]``. Returns the rune and its length ## in bytes. if s[last] <= chr(127): result = (Rune(s[last]), 1) else: var L = 0 while last-L >= 0 and uint(s[last-L]) shr 6 == 0b10: inc(L) var r: Rune fastRuneAt(s, last-L, r, false) result = (r, L+1) proc size*(r: Rune): int {.noSideEffect.} = ## Returns the number of bytes the rune ``r`` takes. runnableExamples: let a = toRunes "aá" doAssert size(a[0]) == 1 doAssert size(a[1]) == 2 let v = r.uint32 if v <= 0x007F'u32: result = 1 elif v <= 0x07FF'u32: result = 2 elif v <= 0xFFFF'u32: result = 3 elif v <= 0x1FFFFF'u32: result = 4 elif v <= 0x3FFFFFF'u32: result = 5 elif v <= 0x7FFFFFFF'u32: result = 6 else: result = 1 # --------- Private templates for different split separators ----------- proc stringHasSep(s: string, index: int, seps: openArray[Rune]): bool = var rune: Rune fastRuneAt(s, index, rune, false) return seps.contains(rune) proc stringHasSep(s: string, index: int, sep: Rune): bool = var rune: Rune fastRuneAt(s, index, rune, false) return sep == rune template splitCommon(s, sep, maxsplit: untyped) = ## Common code for split procedures. let sLen = len(s) var last = 0 splits = maxsplit if sLen > 0: while last <= sLen: var first = last while last < sLen and not stringHasSep(s, last, sep): inc(last, runeLenAt(s, last)) if splits == 0: last = sLen yield s[first .. (last - 1)] if splits == 0: break dec(splits) inc(last, if last < sLen: runeLenAt(s, last) else: 1) iterator split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): string = ## Splits the unicode string ``s`` into substrings using a group of separators. ## ## Substrings are separated by a substring containing only ``seps``. ## ## .. code-block:: nim ## for word in split("this\lis an\texample"): ## writeLine(stdout, word) ## ## ...generates this output: ## ## .. code-block:: ## "this" ## "is" ## "an" ## "example" ## ## And the following code: ## ## .. code-block:: nim ## for word in split("this:is;an$example", {';', ':', '$'}): ## writeLine(stdout, word) ## ## ...produces the same output as the first example. The code: ## ## .. code-block:: nim ## let date = "2012-11-20T22:08:08.398990" ## let separators = {' ', '-', ':', 'T'} ## for number in split(date, separators): ## writeLine(stdout, number) ## ## ...results in: ## ## .. code-block:: ## "2012" ## "11" ## "20" ## "22" ## "08" ## "08.398990" ## splitCommon(s, seps, maxsplit) iterator splitWhitespace*(s: string): string = ## Splits a unicode string at whitespace runes. splitCommon(s, unicodeSpaces, -1) template accResult(iter: untyped) = result = @[] for x in iter: add(result, x) proc splitWhitespace*(s: string): seq[string] {.noSideEffect, rtl, extern: "ncuSplitWhitespace".} = ## The same as the `splitWhitespace <#splitWhitespace.i,string>`_ ## iterator, but is a proc that returns a sequence of substrings. accResult(splitWhitespace(s)) iterator split*(s: string, sep: Rune, maxsplit: int = -1): string = ## Splits the unicode string ``s`` into substrings using a single separator. ## ## Substrings are separated by the rune ``sep``. ## The code: ## ## .. code-block:: nim ## for word in split(";;this;is;an;;example;;;", ';'): ## writeLine(stdout, word) ## ## Results in: ## ## .. code-block:: ## "" ## "" ## "this" ## "is" ## "an" ## "" ## "example" ## "" ## "" ## "" ## splitCommon(s, sep, maxsplit) proc split*(s: string, seps: openArray[Rune] = unicodeSpaces, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nucSplitRunes".} = ## The same as the `split iterator <#split.i,string,openArray[Rune],int>`_, ## but is a proc that returns a sequence of substrings. accResult(split(s, seps, maxsplit)) proc split*(s: string, sep: Rune, maxsplit: int = -1): seq[string] {.noSideEffect, rtl, extern: "nucSplitRune".} = ## The same as the `split iterator <#split.i,string,Rune,int>`_, but is a proc ## that returns a sequence of substrings. accResult(split(s, sep, maxsplit)) proc strip*(s: string, leading = true, trailing = true, runes: openArray[Rune] = unicodeSpaces): string {.noSideEffect, rtl, extern: "nucStrip".} = ## Strips leading or trailing ``runes`` from ``s`` and returns ## the resulting string. ## ## If ``leading`` is true (default), leading ``runes`` are stripped. ## If ``trailing`` is true (default), trailing ``runes`` are stripped. ## If both are false, the string is returned unchanged. runnableExamples: let a = "\táñyóng " doAssert a.strip == "áñyóng" doAssert a.strip(leading = false) == "\táñyóng" doAssert a.strip(trailing = false) == "áñyóng " var sI = 0 ## starting index into string ``s`` eI = len(s) - 1 ## ending index into ``s``, where the last ``Rune`` starts if leading: var i = 0 xI: int ## value of ``sI`` at the beginning of the iteration rune: Rune while i < len(s): xI = i fastRuneAt(s, i, rune) sI = i # Assume to start from next rune if not runes.contains(rune): sI = xI # Go back to where the current rune starts break if trailing: var i = eI xI: int rune: Rune while i >= 0: xI = i fastRuneAt(s, xI, rune) var yI = i - 1 while yI >= 0: var yIend = yI pRune: Rune fastRuneAt(s, yIend, pRune) if yIend < xI: break i = yI rune = pRune dec(yI) if not runes.contains(rune): eI = xI - 1 break dec(i) let newLen = eI - sI + 1 result = newStringOfCap(newLen) if newLen > 0: result.add s[sI .. eI] proc repeat*(c: Rune, count: Natural): string {.noSideEffect, rtl, extern: "nucRepeatRune".} = ## Returns a string of ``count`` Runes ``c``. ## ## The returned string will have a rune-length of ``count``. runnableExamples: let a = "ñ".runeAt(0) doAssert a.repeat(5) == "ñññññ" let s = $c result = newStringOfCap(count * s.len) for i in 0 ..< count: result.add s proc align*(s: string, count: Natural, padding = ' '.Rune): string {. noSideEffect, rtl, extern: "nucAlignString".} = ## Aligns a unicode string ``s`` with ``padding``, so that it has a rune-length ## of ``count``. ## ## ``padding`` characters (by default spaces) are added before ``s`` resulting in ## right alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is ## returned unchanged. If you need to left align a string use the `alignLeft ## proc <#alignLeft,string,Natural>`_. runnableExamples: assert align("abc", 4) == " abc" assert align("a", 0) == "a" assert align("1232", 6) == " 1232" assert align("1232", 6, '#'.Rune) == "##1232" assert align("Åge", 5) == " Åge" assert align("×", 4, '_'.Rune) == "___×" let sLen = s.runeLen if sLen < count: let padStr = $padding result = newStringOfCap(padStr.len * count) let spaces = count - sLen for i in 0 ..< spaces: result.add padStr result.add s else: result = s proc alignLeft*(s: string, count: Natural, padding = ' '.Rune): string {. noSideEffect.} = ## Left-aligns a unicode string ``s`` with ``padding``, so that it has a ## rune-length of ``count``. ## ## ``padding`` characters (by default spaces) are added after ``s`` resulting in ## left alignment. If ``s.runelen >= count``, no spaces are added and ``s`` is ## returned unchanged. If you need to right align a string use the `align ## proc <#align,string,Natural>`_. runnableExamples: assert alignLeft("abc", 4) == "abc " assert alignLeft("a", 0) == "a" assert alignLeft("1232", 6) == "1232 " assert alignLeft("1232", 6, '#'.Rune) == "1232##" assert alignLeft("Åge", 5) == "Åge " assert alignLeft("×", 4, '_'.Rune) == "×___" let sLen = s.runeLen if sLen < count: let padStr = $padding result = newStringOfCap(s.len + (count - sLen) * padStr.len) result.add s for i in sLen ..< count: result.add padStr else: result = s # ----------------------------------------------------------------------------- # deprecated template runeCaseCheck(s, runeProc, skipNonAlpha) = ## Common code for rune.isLower and rune.isUpper. if len(s) == 0: return false var i = 0 rune: Rune hasAtleastOneAlphaRune = false while i < len(s): fastRuneAt(s, i, rune, doInc = true) if skipNonAlpha: var runeIsAlpha = isAlpha(rune) if not hasAtleastOneAlphaRune: hasAtleastOneAlphaRune = runeIsAlpha if runeIsAlpha and (not runeProc(rune)): return false else: if not runeProc(rune): return false return if skipNonAlpha: hasAtleastOneAlphaRune else: true proc isLower*(s: string, skipNonAlpha: bool): bool {. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = ## **Deprecated since version 0.20 since its semantics are unclear** ## ## Checks whether ``s`` is lower case. ## ## If ``skipNonAlpha`` is true, returns true if all alphabetical ## runes in ``s`` are lower case. Returns false if none of the ## runes in ``s`` are alphabetical. ## ## If ``skipNonAlpha`` is false, returns true only if all runes in ## ``s`` are alphabetical and lower case. ## ## For either value of ``skipNonAlpha``, returns false if ``s`` is ## an empty string. runeCaseCheck(s, isLower, skipNonAlpha) proc isUpper*(s: string, skipNonAlpha: bool): bool {. deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = ## **Deprecated since version 0.20 since its semantics are unclear** ## ## Checks whether ``s`` is upper case. ## ## If ``skipNonAlpha`` is true, returns true if all alphabetical ## runes in ``s`` are upper case. Returns false if none of the ## runes in ``s`` are alphabetical. ## ## If ``skipNonAlpha`` is false, returns true only if all runes in ## ``s`` are alphabetical and upper case. ## ## For either value of ``skipNonAlpha``, returns false if ``s`` is ## an empty string. runeCaseCheck(s, isUpper, skipNonAlpha) proc isTitle*(s: string): bool {.noSideEffect, rtl, extern: "nuc$1Str", deprecated: "Deprecated since version 0.20 since its semantics are unclear".} = ## **Deprecated since version 0.20 since its semantics are unclear** ## ## Checks whether or not ``s`` is a unicode title. ## ## Returns true if the first character in each word inside ``s`` ## are upper case and there is at least one character in ``s``. if s.len == 0: return false result = true var i = 0 rune: Rune var firstRune = true while i < len(s) and result: fastRuneAt(s, i, rune, doInc = true) if not rune.isWhiteSpace() and firstRune: result = rune.isUpper() and result firstRune = false elif rune.isWhiteSpace(): firstRune = true when isMainModule: proc asRune(s: static[string]): Rune = ## Compile-time conversion proc for converting string literals to a Rune ## value. Returns the first Rune of the specified string. ## ## Shortcuts code like ``"å".runeAt(0)`` to ``"å".asRune`` and returns a ## compile-time constant. if s.len == 0: Rune(0) else: s.runeAt(0) let someString = "öÑ" someRunes = toRunes(someString) compared = (someString == $someRunes) doAssert compared == true proc testReplacements(word: string): string = case word of "two": return "2" of "foo": return "BAR" of "βeta": return "beta" of "alpha": return "αlpha" else: return "12345" doAssert translate("two not alpha foo βeta", testReplacements) == "2 12345 αlpha BAR beta" doAssert translate(" two not foo βeta ", testReplacements) == " 2 12345 BAR beta " doAssert title("foo bar") == "Foo Bar" doAssert title("αlpha βeta γamma") == "Αlpha Βeta Γamma" doAssert title("") == "" doAssert capitalize("βeta") == "Βeta" doAssert capitalize("foo") == "Foo" doAssert capitalize("") == "" doAssert swapCase("FooBar") == "fOObAR" doAssert swapCase(" ") == " " doAssert swapCase("Αlpha Βeta Γamma") == "αLPHA βETA γAMMA" doAssert swapCase("a✓B") == "A✓b" doAssert swapCase("Јамогујестистаклоитоминештети") == "јАМОГУЈЕСТИСТАКЛОИТОМИНЕШТЕТИ" doAssert swapCase("ὕαλονϕαγεῖνδύναμαιτοῦτοοὔμεβλάπτει") == "ὝΑΛΟΝΦΑΓΕῖΝΔΎΝΑΜΑΙΤΟῦΤΟΟὔΜΕΒΛΆΠΤΕΙ" doAssert swapCase("Կրնամապակիուտեևինծիանհանգիստչըներ") == "կՐՆԱՄԱՊԱԿԻՈՒՏԵևԻՆԾԻԱՆՀԱՆԳԻՍՏՉԸՆԵՐ" doAssert swapCase("") == "" doAssert isAlpha("r") doAssert isAlpha("α") doAssert isAlpha("ϙ") doAssert isAlpha("ஶ") doAssert(not isAlpha("$")) doAssert(not isAlpha("")) doAssert isAlpha("Βeta") doAssert isAlpha("Args") doAssert isAlpha("𐌼𐌰𐌲𐌲𐌻𐌴𐍃𐍄𐌰𐌽") doAssert isAlpha("ὕαλονϕαγεῖνδύναμαιτοῦτοοὔμεβλάπτει") doAssert isAlpha("Јамогујестистаклоитоминештети") doAssert isAlpha("Կրնամապակիուտեևինծիանհանգիստչըներ") doAssert(not isAlpha("$Foo✓")) doAssert(not isAlpha("⠙⠕⠑⠎⠝⠞")) doAssert isSpace("\t") doAssert isSpace("\l") doAssert(not isSpace("Β")) doAssert(not isSpace("Βeta")) doAssert isSpace("\t\l \v\r\f") doAssert isSpace(" ") doAssert(not isSpace("")) doAssert(not isSpace("ΑΓc \td")) doAssert(not isLower(' '.Rune)) doAssert(not isUpper(' '.Rune)) doAssert toUpper("Γ") == "Γ" doAssert toUpper("b") == "B" doAssert toUpper("α") == "Α" doAssert toUpper("✓") == "✓" doAssert toUpper("ϙ") == "Ϙ" doAssert toUpper("") == "" doAssert toUpper("ΑΒΓ") == "ΑΒΓ" doAssert toUpper("AAccβ") == "AACCΒ" doAssert toUpper("A✓$β") == "A✓$Β" doAssert toLower("a") == "a" doAssert toLower("γ") == "γ" doAssert toLower("Γ") == "γ" doAssert toLower("4") == "4" doAssert toLower("Ϙ") == "ϙ" doAssert toLower("") == "" doAssert toLower("abcdγ") == "abcdγ" doAssert toLower("abCDΓ") == "abcdγ" doAssert toLower("33aaΓ") == "33aaγ" doAssert reversed("Reverse this!") == "!siht esreveR" doAssert reversed("先秦兩漢") == "漢兩秦先" doAssert reversed("as⃝df̅") == "f̅ds⃝a" doAssert reversed("a⃞b⃞c⃞") == "c⃞b⃞a⃞" doAssert reversed("ὕαλονϕαγεῖνδύναμαιτοῦτοοὔμεβλάπτει") == "ιετπάλβεμὔοοτῦοτιαμανύδνῖεγαϕνολαὕ" doAssert reversed("Јамогујестистаклоитоминештети") == "итетшенимотиолкатситсејугомаЈ" doAssert reversed("Կրնամապակիուտեևինծիանհանգիստչըներ") == "րենըչտսիգնահնաիծնիևետւոիկապամանրԿ" doAssert len(toRunes("as⃝df̅")) == runeLen("as⃝df̅") const test = "as⃝" doAssert lastRune(test, test.len-1)[1] == 3 doAssert graphemeLen("è", 0) == 2 # test for rune positioning and runeSubStr() let s = "Hänsel ««: 10,00€" var t = "" for c in s.utf8: t.add c doAssert(s == t) doAssert(runeReverseOffset(s, 1) == (20, 18)) doAssert(runeReverseOffset(s, 19) == (-1, 18)) doAssert(runeStrAtPos(s, 0) == "H") doAssert(runeSubStr(s, 0, 1) == "H") doAssert(runeStrAtPos(s, 10) == ":") doAssert(runeSubStr(s, 10, 1) == ":") doAssert(runeStrAtPos(s, 9) == "«") doAssert(runeSubStr(s, 9, 1) == "«") doAssert(runeStrAtPos(s, 17) == "€") doAssert(runeSubStr(s, 17, 1) == "€") # echo runeStrAtPos(s, 18) # index error doAssert(runeSubStr(s, 0) == "Hänsel ««: 10,00€") doAssert(runeSubStr(s, -18) == "Hänsel ««: 10,00€") doAssert(runeSubStr(s, 10) == ": 10,00€") doAssert(runeSubStr(s, 18) == "") doAssert(runeSubStr(s, 0, 10) == "Hänsel ««") doAssert(runeSubStr(s, 12) == "10,00€") doAssert(runeSubStr(s, -6) == "10,00€") doAssert(runeSubStr(s, 12, 5) == "10,00") doAssert(runeSubStr(s, 12, -1) == "10,00") doAssert(runeSubStr(s, -6, 5) == "10,00") doAssert(runeSubStr(s, -6, -1) == "10,00") doAssert(runeSubStr(s, 0, 100) == "Hänsel ««: 10,00€") doAssert(runeSubStr(s, -100, 100) == "Hänsel ««: 10,00€") doAssert(runeSubStr(s, 0, -100) == "") doAssert(runeSubStr(s, 100, -100) == "") block splitTests: let s = " this is an example " let s2 = ":this;is;an:example;;" let s3 = ":this×is×an:example××" doAssert s.split() == @["", "this", "is", "an", "example", "", ""] doAssert s2.split(seps = [':'.Rune, ';'.Rune]) == @["", "this", "is", "an", "example", "", ""] doAssert s3.split(seps = [':'.Rune, "×".asRune]) == @["", "this", "is", "an", "example", "", ""] doAssert s.split(maxsplit = 4) == @["", "this", "is", "an", "example "] doAssert s.split(' '.Rune, maxsplit = 1) == @["", "this is an example "] doAssert s3.split("×".runeAt(0)) == @[":this", "is", "an:example", "", ""] block stripTests: doAssert(strip("") == "") doAssert(strip(" ") == "") doAssert(strip("y") == "y") doAssert(strip(" foofoofoo ") == "foofoofoo") doAssert(strip("sfoofoofoos", runes = ['s'.Rune]) == "foofoofoo") block: let stripTestRunes = ['b'.Rune, 'a'.Rune, 'r'.Rune] doAssert(strip("barfoofoofoobar", runes = stripTestRunes) == "foofoofoo") doAssert(strip("sfoofoofoos", leading = false, runes = ['s'.Rune]) == "sfoofoofoo") doAssert(strip("sfoofoofoos", trailing = false, runes = ['s'.Rune]) == "foofoofoos") block: let stripTestRunes = ["«".asRune, "»".asRune] doAssert(strip("«TEXT»", runes = stripTestRunes) == "TEXT") doAssert(strip("copyright©", leading = false, runes = ["©".asRune]) == "copyright") doAssert(strip("¿Question?", trailing = false, runes = ["¿".asRune]) == "Question?") doAssert(strip("×text×", leading = false, runes = ["×".asRune]) == "×text") doAssert(strip("×text×", trailing = false, runes = ["×".asRune]) == "text×") block repeatTests: doAssert repeat('c'.Rune, 5) == "ccccc" doAssert repeat("×".asRune, 5) == "×××××" block alignTests: doAssert align("abc", 4) == " abc" doAssert align("a", 0) == "a" doAssert align("1232", 6) == " 1232" doAssert align("1232", 6, '#'.Rune) == "##1232" doAssert align("1232", 6, "×".asRune) == "××1232" doAssert alignLeft("abc", 4) == "abc " doAssert alignLeft("a", 0) == "a" doAssert alignLeft("1232", 6) == "1232 " doAssert alignLeft("1232", 6, '#'.Rune) == "1232##" doAssert alignLeft("1232", 6, "×".asRune) == "1232××" block differentSizes: # upper and lower variants have different number of bytes doAssert toLower("AẞC") == "aßc" doAssert toLower("ȺẞCD") == "ⱥßcd" doAssert toUpper("ⱥbc") == "ȺBC" doAssert toUpper("rsⱦuv") == "RSȾUV" doAssert swapCase("ⱥbCd") == "ȺBcD" doAssert swapCase("XyꟆaB") == "xYᶎAb" doAssert swapCase("aᵹcᲈd") == "AꝽCꙊD"