summary refs log tree commit diff stats
path: root/lib/system/widestrs.nim
blob: 8b08959b5c02b2103777beb7b33f1cab4d046edb (plain) (blame)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
pre { line-height: 125%; }
td.linenos .normal { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
span.linenos { color: inherit; background-color: transparent; padding-left: 5px; padding-right: 5px; }
td.linenos .special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
span.linenos.special { color: #000000; background-color: #ffffc0; padding-left: 5px; padding-right: 5px; }
.highlight .hll { background-color: #ffffcc }
.highlight .c { color: #888888 } /* Comment */
.highlight .err { color: #a61717; background-color: #e3d2d2 } /* Error */
.highlight .k { color: #008800; font-weight: bold } /* Keyword */
.highlight .ch { color: #888888 } /* Comment.Hashbang */
.highlight .cm { color: #888888 } /* Comment.Multiline */
.highlight .cp { color: #cc0000; font-weight: bold } /* Comment.Preproc */
.highlight .cpf { color: #888888 } /* Comment.PreprocFile */
.highlight .c1 { color: #888888 } /* Comment.Single */
.highlight .cs { color: #cc0000; font-weight: bold; background-color: #fff0f0 } /* Comment.Special */
.highlight .gd { color: #000000; background-color: #ffdddd } /* Generic.Deleted */
.highlight .ge { font-style: italic } /* Generic.Emph */
.highlight .ges { font-weight: bold; font-style: italic } /* Generic.EmphStrong */
.highlight .gr { color: #aa0000 } /* Generic.Error */
.highlight .gh { color: #333333 } /* Generic.Heading */
.highlight .gi { color: #000000; background-color: #ddffdd } /* Generic.Inserted */
.highlight .go { color: #888888 } /* Generic.Output */
.highlight .gp { color: #555555 } /* Generic.Prompt */
.highlight .gs { font-weight: bold } /* Generic.Strong */
.highlight .gu { color: #666666 } /* Generic.Subheading */
.highlight .gt { color: #aa0000 } /* Generic.Traceback */
.highlight .kc { color: #008800; font-weight: bold } /* Keyword.Constant */
.highlight .kd { color: #008800; font-weight: bold } /* Keyword.Declaration */
.highlight .kn { color: #008800; font-weight: bold } /* Keyword.Namespace */
.highlight .kp { color: #008800 } /* Keyword.Pseudo */
.highlight .kr { color: #008800; font-weight: bold } /* Keyword.Reserved */
.highlight .kt { color: #888888; font-weight: bold } /* Keyword.Type */
.highlight .m { color: #0000DD; font-weight: bold } /* Literal.Number */
.highlight .s { color: #dd2200; background-color: #fff0f0 } /* Literal.String */
.highlight .na { color: #336699 } /* Name.Attribute */
.highlight .nb { color: #003388 } /* Name.Builtin */
.highlight .nc { color: #bb0066; font-weight: bold } /* Name.Class */
.highlight .no { color: #003366; font-weight: bold } /* Name.Constant */
.highlight .nd { color: #555555 } /* Name.Decorator */
.highlight .ne { color: #bb0066; font-weight: bold } /* Name.Exception */
.highlight .nf { color: #0066bb; font-weight: bold } /* Name.Function */
.highlight .nl { color: #336699; font-style: italic } /* Name.Label */
.highlight .nn { color: #bb0066; font-weight: bold } /* Name.Namespace */
.highlight .py { color: #336699; font-weight: bold } /* Name.Property */
.highlight .nt { color: #bb0066; font-weight: bold } /* Name.Tag */
.highlight .nv { color: #336699 } /* Name.Variable */
.highlight .ow { color: #008800 } /* Operator.Word */
.highlight .w { color: #bbbbbb } /* Text.Whitespace */
.highlight .mb { color: #0000DD; font-weight: bold } /* Literal.Number.Bin */
.highlight .mf { color: #0000DD; font-weight: bold } /* Literal.Number.Float */
.highlight .mh { color: #0000DD; font-weight: bold } /* Literal.Number.Hex */
.highlight .mi { color: #0000DD; font-weight: bold } /* Literal.Number.Integer */
.highlight .mo { color: #0000DD; font-weight: bold } /* Literal.Number.Oct */
.highlight .sa { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Affix */
.highlight .sb { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Backtick */
.highlight .sc { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Char */
.highlight .dl { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Delimiter */
.highlight .sd { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Doc */
.highlight .s2 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Double */
.highlight .se { color: #0044dd; background-color: #fff0f0 } /* Literal.String.Escape */
.highlight .sh { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Heredoc */
.highlight .si { color: #3333bb; background-color: #fff0f0 } /* Literal.String.Interpol */
.highlight .sx { color: #22bb22; background-color: #f0fff0 } /* Literal.String.Other */
.highlight .sr { color: #008800; background-color: #fff0ff } /* Literal.String.Regex */
.highlight .s1 { color: #dd2200; background-color: #fff0f0 } /* Literal.String.Single */
.highlight .ss { color: #aa6600; background-color: #fff0f0 } /* Literal.String.Symbol */
.highlight .bp { color: #003388 } /* Name.Builtin.Pseudo */
.highlight .fm { color: #0066bb; font-weight: bold } /* Name.Function.Magic */
.highlight .vc { color: #336699 } /* Name.Variable.Class */
.highlight .vg { color: #dd7700 } /* Name.Variable.Global */
.highlight .vi { color: #3333bb } /* Name.Variable.Instance */
.highlight .vm { color: #336699 } /* Name.Variable.Magic */
.highlight .il { color: #0000DD; font-weight: bold } /* Literal.Number.Integer.Long */
parse/0: instruction: copy
parse/0:   ingredient: {name: "23", value: 0, type: 0, properties: ["23": "literal"]}
parse/0:   product: {name: "1", value: 0, type: 1, properties: ["1": "integer"]}
d='n202' href='#n202'>202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
#
#
#            Nim's Runtime Library
#        (c) Copyright 2012 Andreas Rumpf
#
#    See the file "copying.txt", included in this
#    distribution, for details about the copyright.
#

# Nim support for C/C++'s `wide strings`:idx:. This is part of the system
# module! Do not import it directly!

#when not declared(ThisIsSystem):
#  {.error: "You must not import this module explicitly".}

type
  Utf16Char* = distinct int16

when defined(nimv2):

  type
    WideCString* = ptr UncheckedArray[Utf16Char]

    WideCStringObj* = object
      bytes: int
      data: WideCString

  proc `=destroy`(a: var WideCStringObj) =
    if a.data != nil:
      when compileOption("threads"):
        deallocShared(a.data)
      else:
        dealloc(a.data)

  proc `=copy`(a: var WideCStringObj; b: WideCStringObj) {.error.}

  proc `=sink`(a: var WideCStringObj; b: WideCStringObj) =
    a.bytes = b.bytes
    a.data = b.data

  proc createWide(a: var WideCStringObj; bytes: int) =
    a.bytes = bytes
    when compileOption("threads"):
      a.data = cast[typeof(a.data)](allocShared0(bytes))
    else:
      a.data = cast[typeof(a.data)](alloc0(bytes))

  template `[]`*(a: WideCStringObj; idx: int): Utf16Char = a.data[idx]
  template `[]=`*(a: WideCStringObj; idx: int; val: Utf16Char) = a.data[idx] = val

  template nullWide(): untyped = WideCStringObj(bytes: 0, data: nil)

  converter toWideCString*(x: WideCStringObj): WideCString {.inline.} =
    result = x.data

else:
  template nullWide(): untyped = nil

  type
    WideCString* = ref UncheckedArray[Utf16Char]
    WideCStringObj* = WideCString

  template createWide(a; L) =
    unsafeNew(a, L)

proc ord(arg: Utf16Char): int = int(cast[uint16](arg))

proc len*(w: WideCString): int =
  ## returns the length of a widestring. This traverses the whole string to
  ## find the binary zero end marker!
  result = 0
  while int16(w[result]) != 0'i16: inc result

const
  UNI_REPLACEMENT_CHAR = Utf16Char(0xFFFD'i16)
  UNI_MAX_BMP = 0x0000FFFF
  UNI_MAX_UTF16 = 0x0010FFFF
  # UNI_MAX_UTF32 = 0x7FFFFFFF
  # UNI_MAX_LEGAL_UTF32 = 0x0010FFFF

  halfShift = 10
  halfBase = 0x0010000
  halfMask = 0x3FF

  UNI_SUR_HIGH_START = 0xD800
  UNI_SUR_HIGH_END = 0xDBFF
  UNI_SUR_LOW_START = 0xDC00
  UNI_SUR_LOW_END = 0xDFFF
  UNI_REPL = 0xFFFD

template ones(n: untyped): untyped = ((1 shl n)-1)

template fastRuneAt(s: cstring, i, L: int, result: untyped, doInc = true) =
  ## Returns the unicode character `s[i]` in `result`. If `doInc == true`
  ## `i` is incremented by the number of bytes that have been processed.
  bind ones

  if ord(s[i]) <= 127:
    result = ord(s[i])
    when doInc: inc(i)
  elif ord(s[i]) shr 5 == 0b110:
    #assert(ord(s[i+1]) shr 6 == 0b10)
    if i <= L - 2:
      result = (ord(s[i]) and (ones(5))) shl 6 or (ord(s[i+1]) and ones(6))
      when doInc: inc(i, 2)
    else:
      result = UNI_REPL
      when doInc: inc(i)
  elif ord(s[i]) shr 4 == 0b1110:
    if i <= L - 3:
      #assert(ord(s[i+1]) shr 6 == 0b10)
      #assert(ord(s[i+2]) shr 6 == 0b10)
      result = (ord(s[i]) and ones(4)) shl 12 or
               (ord(s[i+1]) and ones(6)) shl 6 or
               (ord(s[i+2]) and ones(6))
      when doInc: inc(i, 3)
    else:
      result = UNI_REPL
      when doInc: inc(i)
  elif ord(s[i]) shr 3 == 0b11110:
    if i <= L - 4:
      #assert(ord(s[i+1]) shr 6 == 0b10)
      #assert(ord(s[i+2]) shr 6 == 0b10)
      #assert(ord(s[i+3]) shr 6 == 0b10)
      result = (ord(s[i]) and ones(3)) shl 18 or
               (ord(s[i+1]) and ones(6)) shl 12 or
               (ord(s[i+2]) and ones(6)) shl 6 or
               (ord(s[i+3]) and ones(6))
      when doInc: inc(i, 4)
    else:
      result = UNI_REPL
      when doInc: inc(i)
  else:
    result = 0xFFFD
    when doInc: inc(i)

iterator runes(s: cstring, L: int): int =
  var
    i = 0
    result: int
  while i < L:
    fastRuneAt(s, i, L, result, true)
    yield result

proc newWideCString*(size: int): WideCStringObj =
  createWide(result, size * 2 + 2)

proc newWideCString*(source: cstring, L: int): WideCStringObj =
  createWide(result, L * 2 + 2)
  var d = 0
  for ch in runes(source, L):

    if ch <= UNI_MAX_BMP:
      if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_LOW_END:
        result[d] = UNI_REPLACEMENT_CHAR
      else:
        result[d] = cast[Utf16Char](uint16(ch))
    elif ch > UNI_MAX_UTF16:
      result[d] = UNI_REPLACEMENT_CHAR
    else:
      let ch = ch - halfBase
      result[d] = cast[Utf16Char](uint16((ch shr halfShift) + UNI_SUR_HIGH_START))
      inc d
      result[d] = cast[Utf16Char](uint16((ch and halfMask) + UNI_SUR_LOW_START))
    inc d
  result[d] = Utf16Char(0)

proc newWideCString*(s: cstring): WideCStringObj =
  if s.isNil: return nullWide

  result = newWideCString(s, s.len)

proc newWideCString*(s: string): WideCStringObj =
  result = newWideCString(cstring s, s.len)

proc `$`*(w: WideCString, estimate: int, replacement: int = 0xFFFD): string =
  result = newStringOfCap(estimate + estimate shr 2)

  var i = 0
  while w[i].int16 != 0'i16:
    var ch = ord(w[i])
    inc i
    if ch >= UNI_SUR_HIGH_START and ch <= UNI_SUR_HIGH_END:
      # If the 16 bits following the high surrogate are in the source buffer...
      let ch2 = ord(w[i])

      # If it's a low surrogate, convert to UTF32:
      if ch2 >= UNI_SUR_LOW_START and ch2 <= UNI_SUR_LOW_END:
        ch = (((ch and halfMask) shl halfShift) + (ch2 and halfMask)) + halfBase
        inc i
      else:
        #invalid UTF-16
        ch = replacement
    elif ch >= UNI_SUR_LOW_START and ch <= UNI_SUR_LOW_END:
      #invalid UTF-16
      ch = replacement

    if ch < 0x80:
      result.add chr(ch)
    elif ch < 0x800:
      result.add chr((ch shr 6) or 0xc0)
      result.add chr((ch and 0x3f) or 0x80)
    elif ch < 0x10000:
      result.add chr((ch shr 12) or 0xe0)
      result.add chr(((ch shr 6) and 0x3f) or 0x80)
      result.add chr((ch and 0x3f) or 0x80)
    elif ch <= 0x10FFFF:
      result.add chr((ch shr 18) or 0xf0)
      result.add chr(((ch shr 12) and 0x3f) or 0x80)
      result.add chr(((ch shr 6) and 0x3f) or 0x80)
      result.add chr((ch and 0x3f) or 0x80)
    else:
      # replacement char(in case user give very large number):
      result.add chr(0xFFFD shr 12 or 0b1110_0000)
      result.add chr(0xFFFD shr 6 and ones(6) or 0b10_0000_00)
      result.add chr(0xFFFD and ones(6) or 0b10_0000_00)

proc `$`*(s: WideCString): string =
  result = s $ 80

when defined(nimv2):
  proc `$`*(s: WideCStringObj, estimate: int, replacement: int = 0xFFFD): string =
    `$`(s.data, estimate, replacement)

  proc `$`*(s: WideCStringObj): string =
    $(s.data)