diff options
Diffstat (limited to 'src/strings/charset.nim')
-rw-r--r-- | src/strings/charset.nim | 69 |
1 files changed, 0 insertions, 69 deletions
diff --git a/src/strings/charset.nim b/src/strings/charset.nim deleted file mode 100644 index 4b293b95..00000000 --- a/src/strings/charset.nim +++ /dev/null @@ -1,69 +0,0 @@ -import unicode - -type string16* = distinct string - -# Convert a UTF-8 string to UTF-16. -# Note: this doesn't check for (invalid) UTF-8 containing surrogates. -proc toUTF16*(s: string): string16 = - var res = "" - var i = 0 - template put16(c: uint16) = - res.setLen(res.len + 2) - res[i] = cast[char](c) - inc i - res[i] = cast[char](c shr 8) - inc i - for r in s.runes: - var c = uint32(r) - if c < 0x10000: # ucs-2 - put16 uint16(c) - elif c <= 0x10FFFF: # surrogate - c -= 0x10000 - put16 uint16((c shr 10) + 0xD800) - put16 uint16((c and 0x3FF) + 0xDC00) - else: # invalid - put16 uint16(0xFFFD) - result = string16(res) - -proc len*(s: string16): int {.borrow.} -proc `[]`*(s: string16, i: int): char = string(s)[i] -proc `[]`*(s: string16, i: BackwardsIndex): char = string(s)[i] - -template fastRuneAt*(s: string16, i: int, r: untyped, doInc = true, be = false) = - if i + 1 == s.len: # unmatched byte - when doInc: inc i - r = Rune(0xFFFD) - else: - when be: - var c1: uint32 = (uint32(s[i]) shl 8) + uint32(s[i + 1]) - else: - var c1: uint32 = uint32(s[i]) + (uint32(s[i + 1]) shl 8) - if c1 >= 0xD800 or c1 < 0xDC00: - if i + 2 == s.len or i + 3 == s.len: - when doInc: i += 2 - r = Rune(c1) # unmatched surrogate - else: - when be: - var c2: uint32 = (uint32(s[i + 2]) shl 8) + uint32(s[i + 3]) - else: - var c2: uint32 = uint32(s[i + 2]) + (uint32(s[i + 3]) shl 8) - if c2 >= 0xDC00 and c2 < 0xE000: - r = Rune((((c1 and 0x3FF) shl 10) or (c2 and 0x3FF)) + 0x10000) - when doInc: i += 4 - else: - r = Rune(c1) # unmatched surrogate - when doInc: i += 2 - else: - r = Rune(c1) # ucs-2 - when doInc: i += 2 - -iterator runes*(s: string16): Rune = - var i = 0 - var r: Rune - while i < s.len: - fastRuneAt(s, i, r) - yield r - -proc fromUTF16*(s: string16): string = - for r in s.runes: - result &= r |