#
#
# Nimrod's Runtime Library
# (c) Copyright 2012 Andreas Rumpf
#
# See the file "copying.txt", included in this
# distribution, for details about the copyright.
#
## Nimrod support for C/C++'s `wide strings`:idx:. This is part of the system
## module! Do not import it directly!
type
TUtf16Char* = distinct int16
WideCString* = ptr array[0.. 1_000_000, TUtf16Char]
proc len*(w: WideCString): int =
## returns the length of a widestring. This traverses the whole string to
## find the binary zero end marker!
while int16(w[result]) != 0'i16: inc result
when true:
const
UNI_REPLACEMENT_CHAR = TUtf16Char(0xFFFD'i16)
UNI_MAX_BMP = 0x0000FFFF
UNI_MAX_UTF16 = 0x0010FFFF
UNI_MAX_UTF32 = 0x7FFFFFFF
UNI_MAX_LEGAL_UTF32 = 0x0010FFFF
halfShift = 10
halfBase = 0x0010000
halfMask = 0x3FF
UNI_SUR_HIGH_START = 0xD800
UNI_SUR_HIGH_END = 0xDBFF
UNI_SUR_LOW_START = 0xDC00
UNI_SUR_LOW_END = 0xDFFF
template ones(n: expr): expr = ((1 shl n)-1)
template fastRuneAt(s: cstring, i: int, result: expr, doInc = true) =
## Returns the unicode character ``s[i]`` in `result`. If ``doInc == true``
## `i` is incremented by the number of bytes that have been processed.
bind ones
if ord(s[i]) <=% 127:
result = ord(s[i])
when doInc: inc(i)
elif ord(s[i]) shr 5 == 0b110:
#assert(ord(s[i+1]) shr 6 == 0b10)
result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
when doInc: inc(i, 2)
elif ord(s[i]) shr 4 == 0b1110:
#assert(ord(s[i+1]) shr 6 == 0b10)
#assert(ord(s[i+2]) shr 6 == 0b10)
result = (ord(s[i]) and ones(4)) shl 12 or
(ord(s[i+1]) and ones(6)) shl 6 or
(ord(s[i+2]) and ones(6))
when doInc: inc(i, 3)
elif ord(s[i]) shr 3 == 0b11110:
#assert(ord(s[i+1]) shr 6 == 0b10)
#assert(ord(s[i+2]) shr 6 == 0b10)
#assert(ord(s[i+3]) shr 6 == 0b10)
result = (ord(s[i]) and ones(3)) shl 18 or
(ord(s[i+1]) and ones(6)) shl 12 or
(ord(s[i+2]) and ones(6)) shl 6 or
(ord(s[i+3]) and ones(6))
when doInc: inc(i, 4)
else:
result = 0xFFFD
when doInc: inc(i)
iterator runes(s: cstring): int =
var
i = 0
result: int
while s[i] != '\0':
fastRuneAt(s, i, result, true)
yield result
proc allocWideCString*(source: cstring, L: int): WideCString =
## free after usage with `dealloc`.
result = cast[wideCString](alloc(L * 4 + 2))
var d = 0
for ch in runes(source):
if ch <=% UNI_MAX_BMP:
if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_LOW_END:
result[d] = UNI_REPLACEMENT_CHAR
else:
result[d] = TUtf16Char(toU16(ch))
elif ch >% UNI_MAX_UTF16:
result[d] = UNI_REPLACEMENT_CHAR
else:
let ch = ch -% halfBase
result[d] = TUtf16Char(toU16((ch shr halfShift) +% UNI_SUR_HIGH_START))
inc d
result[d] = TUtf16Char(toU16((ch and halfMask) +% UNI_SUR_LOW_START))
inc d
result[d] = TUtf16Char(0'i16)
proc allocWideCString*(s: cstring): WideCString =
## free after usage with `dealloc`.
if s.isNil: return nil
when not defined(c_strlen):
proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
let L = cstrlen(s)
result = allocWideCString(s, L)
proc allocWideCString*(s: string): WideCString =
## free after usage with `dealloc`.
result = allocWideCString(s, s.len)
proc `$`*(w: wideCString, estimate: int): string =
result = newStringOfCap(estimate + estimate shr 2)
var i = 0
while w[i].int16 != 0'i16:
var ch = w[i].int
inc i
if ch >=% UNI_SUR_HIGH_START and ch <=% UNI_SUR_HIGH_END:
# If the 16 bits following the high surrogate are in the source buffer...
let ch2 = w[i].int
# If it's a low surrogate, convert to UTF32:
if ch2 >=% UNI_SUR_LOW_START and ch2 <=% UNI_SUR_LOW_END:
ch = ((ch -% UNI_SUR_HIGH_START) shr halfShift) +%
(ch2 -% UNI_SUR_LOW_START) +% halfBase
inc i
if ch <=% 127:
result.add chr(ch)
elif ch <=% 0x07FF:
result.add chr((ch shr 6) or 0b110_00000)
result.add chr((ch and ones(6)) or 0b10_000000)
elif ch <=% 0xFFFF:
result.add chr(ch shr 12 or 0b1110_0000)
result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
result.add chr(ch and ones(6) or 0b10_0000_00)
elif ch <=% 0x0010FFFF:
result.add chr(ch shr 18 or 0b1111_0000)
result.add chr(ch shr 12 and ones(6) or 0b10_0000_00)
result.add chr(ch shr 6 and ones(6) or 0b10_0000_00)
result.add chr(ch and ones(6) or 0b10_0000_00)
else:
# replacement char:
result.add chr(0xFFFD shr 12 or 0b1110_0000)
result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
result.add chr(0xFFFD and ones(6) or 0b10_0000_00)
proc `$`*(s: WideCString): string =
result = s $ 80
else:
const
utf8Encoding = 65001
proc MultiByteToWideChar*(
CodePage: int32,
dwFlags: int32,
lpMultiByteStr: cstring,
cbMultiByte: cint,
lpWideCharStr: WideCString,
cchWideChar: cint): cint {.
stdcall, importc: "MultiByteToWideChar", dynlib: "kernel32".}
proc WideCharToMultiByte*(
CodePage: int32,
dwFlags: int32,
lpWideCharStr: WideCString,
cchWideChar: cint,
lpMultiByteStr: cstring,
cbMultiByte: cint,
lpDefaultChar: cstring=nil,
lpUsedDefaultChar: pointer=nil): cint {.
stdcall, importc: "WideCharToMultiByte", dynlib: "kernel32".}
proc raiseEncodingError() {.noinline, noreturn.} =
raise newException(EOS, "error in unicode conversion")
proc `$`*(s: WideCString, len: int): string =
# special case: empty string: needed because MultiByteToWideChar
# returns 0 in case of error:
if len == 0: return ""
# educated guess of capacity:
var cap = len + len shr 2
result = newStringOfCap(cap)
let m = WideCharToMultiByte(
CodePage = utf8Encoding,
dwFlags = 0'i32,
lpWideCharStr = s,
cchWideChar = cint(len),
lpMultiByteStr = cstring(result),
cbMultiByte = cap)
if m == 0:
# try again; ask for capacity:
cap = WideCharToMultiByte(
CodePage = utf8Encoding,
dwFlags = 0'i32,
lpWideCharStr = s,
cchWideChar = cint(len),
lpMultiByteStr = nil,
cbMultiByte = cint(0))
# and do the conversion properly:
result = newStringOfCap(cap)
let m = WideCharToMultiByte(
CodePage = utf8Encoding,
dwFlags = 0'i32,
lpWideCharStr = s,
cchWideChar = cint(len),
lpMultiByteStr = cstring(result),
cbMultiByte = cap)
if m == 0: raiseEncodingError()
setLen(result, m)
elif m <= cap:
setLen(result, m)
else:
sysAssert(false, "") # cannot happen
proc `$`*(s: WideCString): string =
result = s $ s.len
proc allocWideCString*(s: string): WideCString =
## free after usage with `dealloc`.
let cap = s.len+1
result = cast[wideCString](alloc0(cap * 2))
# special case: empty string: needed because MultiByteToWideChar
# return 0 in case of error:
if s.len == 0: return
# convert to utf-16 LE
let m = MultiByteToWideChar(CodePage = utf8Encoding, dwFlags = 0'i32,
lpMultiByteStr = cstring(s),
cbMultiByte = cint(s.len),
lpWideCharStr = result,
cchWideChar = cint(cap))
if m == 0: raiseEncodingError()
proc allocWideCString*(s: cstring): WideCString =
## free after usage with `dealloc`.
if s.isNil: return nil
when not defined(c_strlen):
proc c_strlen(a: CString): int {.nodecl, noSideEffect, importc: "strlen".}
let len = cstrlen(s)
let cap = len+1
result = cast[wideCString](alloc0(cap * 2))
# special case: empty string: needed because MultiByteToWideChar
# return 0 in case of error:
if s.len == 0: return
# convert to utf-16 LE
let m = MultiByteToWideChar(CodePage = utf8Encoding, dwFlags = 0'i32,
lpMultiByteStr = s,
cbMultiByte = cint(len),
lpWideCharStr = result,
cchWideChar = cint(cap))
if m == 0: raiseEncodingError()