diff options
author | Bung <crc32@qq.com> | 2022-12-01 20:34:00 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2022-12-01 13:34:00 +0100 |
commit | 658b28dc5707601b39d9aad4b6bf79a9afff1e92 (patch) | |
tree | b8e10d4d1f2666c45c3b1dec23e7c03d5ba23be9 /lib/std/widestrs.nim | |
parent | a70d3abd37819e7562f80f0c808788c3f5c62c55 (diff) | |
download | Nim-658b28dc5707601b39d9aad4b6bf79a9afff1e92.tar.gz |
tyInt tyUint fit target int bit width (#20829)
Diffstat (limited to 'lib/std/widestrs.nim')
-rw-r--r-- | lib/std/widestrs.nim | 379 |
1 files changed, 190 insertions, 189 deletions
diff --git a/lib/std/widestrs.nim b/lib/std/widestrs.nim index 7df4004d1..f0f648c8d 100644 --- a/lib/std/widestrs.nim +++ b/lib/std/widestrs.nim @@ -15,214 +15,215 @@ type Utf16Char* = distinct int16 -when defined(nimv2): +when not (defined(cpu16) or defined(cpu8)): + when defined(nimv2): - type - WideCString* = ptr UncheckedArray[Utf16Char] + type + WideCString* = ptr UncheckedArray[Utf16Char] - WideCStringObj* = object - bytes: int - data: WideCString + WideCStringObj* = object + bytes: int + data: WideCString - proc `=destroy`(a: var WideCStringObj) = - if a.data != nil: + proc `=destroy`(a: var WideCStringObj) = + if a.data != nil: + when compileOption("threads"): + deallocShared(a.data) + else: + dealloc(a.data) + + proc `=copy`(a: var WideCStringObj; b: WideCStringObj) {.error.} + + proc `=sink`(a: var WideCStringObj; b: WideCStringObj) = + a.bytes = b.bytes + a.data = b.data + + proc createWide(a: var WideCStringObj; bytes: int) = + a.bytes = bytes when compileOption("threads"): - deallocShared(a.data) + a.data = cast[typeof(a.data)](allocShared0(bytes)) else: - dealloc(a.data) + a.data = cast[typeof(a.data)](alloc0(bytes)) - proc `=copy`(a: var WideCStringObj; b: WideCStringObj) {.error.} + template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx] + template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val - proc `=sink`(a: var WideCStringObj; b: WideCStringObj) = - a.bytes = b.bytes - a.data = b.data + template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil) + + converter toWideCString*(x: WideCStringObj): WideCString {.inline.} = + result = x.data - proc createWide(a: var WideCStringObj; bytes: int) = - a.bytes = bytes - when compileOption("threads"): - a.data = cast[typeof(a.data)](allocShared0(bytes)) - else: - a.data = cast[typeof(a.data)](alloc0(bytes)) - - template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx] - template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val - - template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil) - - converter toWideCString*(x: WideCStringObj): WideCString {.inline.} = - result = x.data - -else: - template nullWide(): untyped = nil - - type - WideCString* = ref UncheckedArray[Utf16Char] - WideCStringObj* = WideCString - - template createWide(a; L) = - unsafeNew(a, L) - -proc ord(arg: Utf16Char): int = int(cast[uint16](arg)) - -proc len*(w: WideCString): int = - ## returns the length of a widestring. This traverses the whole string to - ## find the binary zero end marker! - result = 0 - while int16(w[result]) != 0'i16: inc result - -const - UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16) - UNI_MAX_BMP = 0x0000FFFF - UNI_MAX_UTF16 = 0x0010FFFF - # UNI_MAX_UTF32 = 0x7FFFFFFF - # UNI_MAX_LEGAL_UTF32 = 0x0010FFFF - - halfShift = 10 - halfBase = 0x0010000 - halfMask = 0x3FF - - UNI_SUR_HIGH_START = 0xD800 - UNI_SUR_HIGH_END = 0xDBFF - UNI_SUR_LOW_START = 0xDC00 - UNI_SUR_LOW_END = 0xDFFF - UNI_REPL = 0xFFFD - -template ones(n: untyped): untyped = ((1 shl n)-1) - -template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) = - ## Returns the unicode character `s[i]` in `result`. If `doInc == true` - ## `i` is incremented by the number of bytes that have been processed. - bind ones - - if ord(s[i]) <= 127: - result = ord(s[i]) - when doInc: inc(i) - elif ord(s[i]) shr 5 == 0b110: - #assert(ord(s[i+1]) shr 6 == 0b10) - if i <= L - 2: - result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6)) - when doInc: inc(i, 2) - else: - result = UNI_REPL - when doInc: inc(i) - elif ord(s[i]) shr 4 == 0b1110: - if i <= L - 3: - #assert(ord(s[i+1]) shr 6 == 0b10) - #assert(ord(s[i+2]) shr 6 == 0b10) - result = (ord(s[i]) and ones(4)) shl 12 or - (ord(s[i+1]) and ones(6)) shl 6 or - (ord(s[i+2]) and ones(6)) - when doInc: inc(i, 3) - else: - result = UNI_REPL - when doInc: inc(i) - elif ord(s[i]) shr 3 == 0b11110: - if i <= L - 4: - #assert(ord(s[i+1]) shr 6 == 0b10) - #assert(ord(s[i+2]) shr 6 == 0b10) - #assert(ord(s[i+3]) shr 6 == 0b10) - result = (ord(s[i]) and ones(3)) shl 18 or - (ord(s[i+1]) and ones(6)) shl 12 or - (ord(s[i+2]) and ones(6)) shl 6 or - (ord(s[i+3]) and ones(6)) - when doInc: inc(i, 4) - else: - result = UNI_REPL - when doInc: inc(i) else: - result = 0xFFFD - when doInc: inc(i) - -iterator runes(s: cstring, L: int): int = - var - i = 0 - result: int - while i < L: - fastRuneAt(s, i, L, result, true) - yield result - -proc newWideCString*(size: int): WideCStringObj = - createWide(result, size * 2 + 2) - -proc newWideCString*(source: cstring, L: int): WideCStringObj = - createWide(result, L * 2 + 2) - var d = 0 - for ch in runes(source, L): - - if ch <= UNI_MAX_BMP: - if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END: - result[d] = UNI_REPLACEMENT_CHAR - else: - result[d] = cast[Utf16Char](uint16(ch)) - elif ch > UNI_MAX_UTF16: - result[d] = UNI_REPLACEMENT_CHAR - else: - let ch = ch - halfBase - result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START)) - inc d - result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START)) - inc d - result[d] = Utf16Char(0) + template nullWide(): untyped = nil -proc newWideCString*(s: cstring): WideCStringObj = - if s.isNil: return nullWide + type + WideCString* = ref UncheckedArray[Utf16Char] + WideCStringObj* = WideCString - result = newWideCString(s, s.len) + template createWide(a; L) = + unsafeNew(a, L) -proc newWideCString*(s: string): WideCStringObj = - result = newWideCString(cstring s, s.len) + proc ord(arg: Utf16Char): int = int(cast[uint16](arg)) -proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string = - result = newStringOfCap(estimate + estimate shr 2) + proc len*(w: WideCString): int = + ## returns the length of a widestring. This traverses the whole string to + ## find the binary zero end marker! + result = 0 + while int16(w[result]) != 0'i16: inc result - var i = 0 - while w[i].int16 != 0'i16: - var ch = ord(w[i]) - inc i - if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END: - # If the 16 bits following the high surrogate are in the source buffer... - let ch2 = ord(w[i]) + const + UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16) + UNI_MAX_BMP = 0x0000FFFF + UNI_MAX_UTF16 = 0x0010FFFF + # UNI_MAX_UTF32 = 0x7FFFFFFF + # UNI_MAX_LEGAL_UTF32 = 0x0010FFFF - # If it's a low surrogate, convert to UTF32: - if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END: - ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase - inc i + halfShift = 10 + halfBase = 0x0010000 + halfMask = 0x3FF + + UNI_SUR_HIGH_START = 0xD800 + UNI_SUR_HIGH_END = 0xDBFF + UNI_SUR_LOW_START = 0xDC00 + UNI_SUR_LOW_END = 0xDFFF + UNI_REPL = 0xFFFD + + template ones(n: untyped): untyped = ((1 shl n)-1) + + template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) = + ## Returns the unicode character `s[i]` in `result`. If `doInc == true` + ## `i` is incremented by the number of bytes that have been processed. + bind ones + + if ord(s[i]) <= 127: + result = ord(s[i]) + when doInc: inc(i) + elif ord(s[i]) shr 5 == 0b110: + #assert(ord(s[i+1]) shr 6 == 0b10) + if i <= L - 2: + result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6)) + when doInc: inc(i, 2) + else: + result = UNI_REPL + when doInc: inc(i) + elif ord(s[i]) shr 4 == 0b1110: + if i <= L - 3: + #assert(ord(s[i+1]) shr 6 == 0b10) + #assert(ord(s[i+2]) shr 6 == 0b10) + result = (ord(s[i]) and ones(4)) shl 12 or + (ord(s[i+1]) and ones(6)) shl 6 or + (ord(s[i+2]) and ones(6)) + when doInc: inc(i, 3) else: + result = UNI_REPL + when doInc: inc(i) + elif ord(s[i]) shr 3 == 0b11110: + if i <= L - 4: + #assert(ord(s[i+1]) shr 6 == 0b10) + #assert(ord(s[i+2]) shr 6 == 0b10) + #assert(ord(s[i+3]) shr 6 == 0b10) + result = (ord(s[i]) and ones(3)) shl 18 or + (ord(s[i+1]) and ones(6)) shl 12 or + (ord(s[i+2]) and ones(6)) shl 6 or + (ord(s[i+3]) and ones(6)) + when doInc: inc(i, 4) + else: + result = UNI_REPL + when doInc: inc(i) + else: + result = 0xFFFD + when doInc: inc(i) + + iterator runes(s: cstring, L: int): int = + var + i = 0 + result: int + while i < L: + fastRuneAt(s, i, L, result, true) + yield result + + proc newWideCString*(size: int): WideCStringObj = + createWide(result, size * 2 + 2) + + proc newWideCString*(source: cstring, L: int): WideCStringObj = + createWide(result, L * 2 + 2) + var d = 0 + for ch in runes(source, L): + + if ch <= UNI_MAX_BMP: + if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END: + result[d] = UNI_REPLACEMENT_CHAR + else: + result[d] = cast[Utf16Char](uint16(ch)) + elif ch > UNI_MAX_UTF16: + result[d] = UNI_REPLACEMENT_CHAR + else: + let ch = ch - halfBase + result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START)) + inc d + result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START)) + inc d + result[d] = Utf16Char(0) + + proc newWideCString*(s: cstring): WideCStringObj = + if s.isNil: return nullWide + + result = newWideCString(s, s.len) + + proc newWideCString*(s: string): WideCStringObj = + result = newWideCString(cstring s, s.len) + + proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string = + result = newStringOfCap(estimate + estimate shr 2) + + var i = 0 + while w[i].int16 != 0'i16: + var ch = ord(w[i]) + inc i + if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END: + # If the 16 bits following the high surrogate are in the source buffer... + let ch2 = ord(w[i]) + + # If it's a low surrogate, convert to UTF32: + if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END: + ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase + inc i + else: + #invalid UTF-16 + ch = replacement + elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END: #invalid UTF-16 ch = replacement - elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END: - #invalid UTF-16 - ch = replacement - - if ch < 0x80: - result.add chr(ch) - elif ch < 0x800: - result.add chr((ch shr 6) or 0xc0) - result.add chr((ch and 0x3f) or 0x80) - elif ch < 0x10000: - result.add chr((ch shr 12) or 0xe0) - result.add chr(((ch shr 6) and 0x3f) or 0x80) - result.add chr((ch and 0x3f) or 0x80) - elif ch <= 0x10FFFF: - result.add chr((ch shr 18) or 0xf0) - result.add chr(((ch shr 12) and 0x3f) or 0x80) - result.add chr(((ch shr 6) and 0x3f) or 0x80) - result.add chr((ch and 0x3f) or 0x80) - else: - # replacement char(in case user give very large number): - result.add chr(0xFFFD shr 12 or 0b1110_0000) - result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00) - result.add chr(0xFFFD and ones(6) or 0b10_0000_00) -proc `$`*(s: WideCString): string = - result = s $ 80 + if ch < 0x80: + result.add chr(ch) + elif ch < 0x800: + result.add chr((ch shr 6) or 0xc0) + result.add chr((ch and 0x3f) or 0x80) + elif ch < 0x10000: + result.add chr((ch shr 12) or 0xe0) + result.add chr(((ch shr 6) and 0x3f) or 0x80) + result.add chr((ch and 0x3f) or 0x80) + elif ch <= 0x10FFFF: + result.add chr((ch shr 18) or 0xf0) + result.add chr(((ch shr 12) and 0x3f) or 0x80) + result.add chr(((ch shr 6) and 0x3f) or 0x80) + result.add chr((ch and 0x3f) or 0x80) + else: + # replacement char(in case user give very large number): + result.add chr(0xFFFD shr 12 or 0b1110_0000) + result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00) + result.add chr(0xFFFD and ones(6) or 0b10_0000_00) + + proc `$`*(s: WideCString): string = + result = s $ 80 -when defined(nimv2): - proc `$`*(s: WideCStringObj, estimate: int, replacement: int = 0xFFFD): string = - `$`(s.data, estimate, replacement) + when defined(nimv2): + proc `$`*(s: WideCStringObj, estimate: int, replacement: int = 0xFFFD): string = + `$`(s.data, estimate, replacement) - proc `$`*(s: WideCStringObj): string = - $(s.data) + proc `$`*(s: WideCStringObj): string = + $(s.data) - proc len*(w: WideCStringObj): int {.inline.} = - len(w.data) + proc len*(w: WideCStringObj): int {.inline.} = + len(w.data) |