about summary refs log tree commit diff stats
path: root/src/encoding/decoderstream.nim
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-08-14 21:38:49 +0200
committerbptato <nincsnevem662@gmail.com>2023-08-14 21:38:49 +0200
commit6b0b7ccfc571b1df8bfbce26703f64e89861f779 (patch)
tree68e87d16b5285b29ab8ecdbbf887821cc1e0b67e /src/encoding/decoderstream.nim
parent5cca932e90387781cfa14ae77d587c25c2e0dcf4 (diff)
downloadchawan-6b0b7ccfc571b1df8bfbce26703f64e89861f779.tar.gz
Move charsets into chakasu
Operation "modularize Chawan somewhat" part 1
Diffstat (limited to 'src/encoding/decoderstream.nim')
-rw-r--r--src/encoding/decoderstream.nim863
1 files changed, 0 insertions, 863 deletions
diff --git a/src/encoding/decoderstream.nim b/src/encoding/decoderstream.nim
deleted file mode 100644
index c78eebdc..00000000
--- a/src/encoding/decoderstream.nim
+++ /dev/null
@@ -1,863 +0,0 @@
-import algorithm
-import streams
-import unicode
-
-import data/charset
-import utils/twtstr
-
-# DecoderStream decodes any encoding to valid utf-32.
-type
-  DecoderErrorMode* = enum
-    DECODER_ERROR_MODE_FATAL, DECODER_ERROR_MODE_REPLACEMENT
-
-  ISO2022JPState = enum
-    STATE_ASCII, STATE_ROMAN, STATE_KATAKANA, STATE_LEAD_BYTE,
-    STATE_TRAIL_BYTE, STATE_ESCAPE_START, STATE_ESCAPE
-
-  DecoderStream* = ref object
-    source: Stream
-    errormode: DecoderErrorMode
-    isend: bool
-    failed*: bool
-    bufs: seq[seq[uint32]]
-    bs: int
-    bi: int
-    buflen: int
-    c: uint32
-    case charset: Charset
-    of CHARSET_UTF_8:
-      u8c: uint32
-      u8needed: int
-      u8seen: int
-      u8bounds: Slice[uint8]
-    of CHARSET_GBK, CHARSET_GB18030:
-      gb18first: uint8
-      gb18second: uint8
-      gb18third: uint8
-      gb18buf: uint8
-      gb18hasbuf: bool
-    of CHARSET_BIG5:
-      big5lead: uint8
-    of CHARSET_EUC_JP:
-      eucjplead: uint8
-      eucjpjis0212: bool
-    of CHARSET_ISO_2022_JP:
-      iso2022jplead: uint8
-      iso2022jpstate: ISO2022JPState
-      iso2022jpoutputstate: ISO2022JPState
-      iso2022jpoutput: bool
-      iso2022jpbuf: uint8
-      iso2022jphasbuf: bool
-    of CHARSET_SHIFT_JIS:
-      sjislead: uint8
-    of CHARSET_EUC_KR:
-      euckrlead: uint8
-    of CHARSET_UTF_16_BE, CHARSET_UTF_16_LE:
-      u16lead: uint8
-      u16surr: uint16
-      u16haslead: bool
-      u16hassurr: bool
-    of CHARSET_REPLACEMENT:
-      replreported: bool
-    else: discard
-
-template append_codepoint_buf(stream: DecoderStream, c: uint32) =
-  if stream.bi >= stream.buflen:
-    stream.bufs.add(newSeqUninitialized[uint32](stream.buflen))
-    stream.bi = 0
-  stream.bufs[^1][stream.bi] = c
-  inc stream.bi
-
-template append_codepoint(stream: DecoderStream, c: uint32, oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  if n < olen:
-    oq[n div sizeof(uint32)] = c
-    n += sizeof(uint32)
-  else:
-    append_codepoint_buf stream, c
-
-template append_codepoint(stream: DecoderStream, c: char, oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  stream.append_codepoint uint32(c), oq, olen, n
-
-proc handleError(stream: DecoderStream, oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  case stream.errormode
-  of DECODER_ERROR_MODE_FATAL:
-    stream.isend = true
-    stream.failed = true
-  of DECODER_ERROR_MODE_REPLACEMENT:
-    stream.append_codepoint 0xFFFD, oq, olen, n
-
-proc decodeUTF8(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  var c = stream.u8c
-  var needed = stream.u8needed
-  var seen = stream.u8seen
-  var bounds = stream.u8bounds
-  var i = 0
-  while i < iq.len:
-    let b = iq[i]
-    if needed == 0:
-      case b
-      of 0x00u8 .. 0x7Fu8:
-        stream.append_codepoint uint32(b), oq, olen, n
-      of 0xC2u8 .. 0xDFu8:
-        needed = 1
-        c = uint32(b) and 0x1F
-      of 0xE0u8:
-        bounds.a = 0xA0
-        needed = 2
-        c = uint32(b) and 0xF
-      of 0xEDu8:
-        bounds.b = 0x9F
-        needed = 2
-        c = uint32(b) and 0xF
-      of 0xE1u8 .. 0xECu8, 0xEEu8 .. 0xEFu8:
-        needed = 2
-        c = uint32(b) and 0xF
-      of 0xF0u8:
-        bounds.a = 0x90
-        needed = 3
-        c = uint32(b) and 0x7
-      of 0xF4u8:
-        bounds.b = 0x8F
-        needed = 3
-        c = uint32(b) and 0x7
-      of 0xF1u8 .. 0xF3u8:
-        needed = 3
-        c = uint32(b) and 0x7
-      else:
-        stream.handleError(oq, olen, n)
-        if stream.isend: # fatal error
-          break
-      inc i
-      continue
-    if b notin bounds:
-      c = 0
-      needed = 0
-      seen = 0
-      bounds = 0x80u8 .. 0xBFu8
-      stream.handleError(oq, olen, n)
-      continue # prepend (no inc i)
-    bounds = 0x80u8 .. 0xBFu8
-    c = (c shl 6) or (b and 0x3F)
-    inc seen
-    if seen == needed:
-      stream.append_codepoint c, oq, olen, n
-      c = 0
-      needed = 0
-      seen = 0
-    inc i
-  stream.u8c = c
-  stream.u8bounds = bounds
-  stream.u8seen = seen
-  stream.u8needed = needed
-
-proc gb18RangesCodepoint(p: uint32): uint32 =
-  if p > 39419 and p < 189000 or p > 1237575:
-    return high(uint32) # null
-  if p == 7457:
-    return 0xE7C7
-  # Let offset be the last pointer in index gb18030 ranges that is less than or
-  # equal to pointer and code point offset its corresponding code point.
-  var offset: uint32
-  var c: uint32
-  if p >= 189000:
-    # omitted from the map for storage efficiency
-    offset = 189000
-    c = 0x10000
-  elif p >= 39394:
-    # Needed because upperBound returns the first element greater than pointer
-    # OR last on failure, so we can't just remove one if p is e.g. 39400.
-    offset = 39394
-    c = 0xFFE6
-  else:
-    # Find the first range that is greater than p, or last if no such element
-    # is found.
-    # We want the last that is <=, so decrease index by one.
-    let i = upperBound(Gb18030RangesDecode, p, func(a: tuple[p, ucs: uint16], b: uint32): int =
-      cmp(uint32(a.p), b))
-    let elem = Gb18030RangesDecode[i - 1]
-    offset = elem.p
-    c = elem.ucs
-  return c + p - offset
-
-proc decodeGb18030(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  var first = stream.gb18first
-  var second = stream.gb18second
-  var third = stream.gb18third
-  var buf = stream.gb18buf
-  var hasbuf = stream.gb18hasbuf
-  var i = 0
-  while i < iq.len:
-    let b = if hasbuf:
-      hasbuf = false
-      dec i
-      buf
-    else:
-      iq[i]
-    if third != 0:
-      if b notin 0x30u8 .. 0x39u8:
-        hasbuf = true
-        buf = second
-        first = third
-        first = 0
-        second = 0
-        third = 0
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-        continue # prepend (no inc i)
-      else:
-        let p = ((uint32(first) - 0x81) * 10 * 126 * 10) +
-                ((uint32(second) - 0x30) * (10 * 126)) +
-                ((uint32(third) - 0x81) * 10) + uint32(b) - 0x30
-        let c = gb18RangesCodepoint(p)
-        first = 0
-        second = 0
-        third = 0
-        if c == high(uint32): # null
-          stream.handleError(oq, olen, n)
-          if stream.isend: break
-        else:
-          stream.append_codepoint c, oq, olen, n
-    elif second != 0:
-      if b in 0x81u8 .. 0xFEu8:
-        third = b
-      else:
-        hasbuf = true
-        buf = second
-        first = 0
-        second = 0
-        third = 0
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    elif first != 0:
-      if b in 0x30u8 .. 0x39u8:
-        second = b
-      else:
-        let ff = first
-        first = 0
-        if b in 0x40u8 .. 0x7Eu8:
-          let offset = if b < 0x7F: 0x40u32 else: 0x41u32
-          let p = (uint16(ff) - 0x81) * 190 + (uint16(b) - offset)
-          if p < Gb18030Decode.len:
-            let c = Gb18030Decode[cast[uint16](p)]
-            stream.append_codepoint uint32(c), oq, olen, n
-            inc i
-            continue
-        if cast[char](b) in Ascii:
-          continue # prepend (no inc i)
-        else:
-          stream.handleError(oq, olen, n)
-          if stream.isend: break
-    elif cast[char](b) in Ascii:
-      stream.append_codepoint b, oq, olen, n
-    elif b == 0x80:
-      stream.append_codepoint 0x20AC, oq, olen, n
-    elif b in 0x81u8 .. 0xFEu8:
-      first = b
-    else:
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-    inc i
-  stream.gb18first = first
-  stream.gb18second = second
-  stream.gb18third = third
-  stream.gb18buf = buf
-  stream.gb18hasbuf = hasbuf
-
-proc decodeBig5(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  for b in iq:
-    if stream.big5lead != 0:
-      let lead = uint32(stream.big5lead)
-      stream.big5lead = 0
-      let offset = if b < 0x7F: 0x40u16 else: 0x62u16
-      if b in {0x40u8 .. 0x7Eu8, 0xA1u8 .. 0xFEu8}:
-        let p = (lead - 0x81) * 157 + uint16(b) - offset
-        template output_two(a, b: uint32) =
-          stream.append_codepoint a, oq, olen, n
-          stream.append_codepoint b, oq, olen, n
-        block no_continue:
-          case p
-          of 1133: output_two 0x00CA, 0x0304
-          of 1135: output_two 0x00CA, 0x030C
-          of 1164: output_two 0x00EA, 0x0304
-          of 1166: output_two 0x00EA, 0x030C
-          else: break no_continue
-          continue
-        if p < Big5Decode.len + Big5DecodeOffset:
-          let c = Big5Decode[p - Big5DecodeOffset]
-          if c != 0:
-            stream.append_codepoint c, oq, olen, n
-            continue
-      if cast[char](b) in Ascii:
-        stream.append_codepoint b, oq, olen, n
-      else:
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    elif cast[char](b) in Ascii:
-      stream.append_codepoint b, oq, olen, n
-    elif b in 0x81u8 .. 0xFEu8:
-      stream.big5lead = b
-    else:
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-
-proc decodeEUCJP(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  var jis0212 = stream.eucjpjis0212
-  var lead = stream.eucjplead
-  for b in iq:
-    if lead == 0x8E and b in 0xA1u8 .. 0xDFu8:
-      lead = 0
-      stream.append_codepoint b, oq, olen, n
-    elif lead == 0x8F and b in 0xA1u8 .. 0xFEu8:
-      jis0212 = true
-      lead = b
-    elif lead != 0:
-      if lead in 0xA1u8 .. 0xFEu8 and b in 0xA1u8 .. 0xFEu8:
-        let p = (uint16(lead) - 0xA1) * 94 + uint16(b) - 0xA1
-        lead = 0
-        var c: uint16
-        if jis0212:
-          if p < Jis0212Decode.len:
-            c = Jis0212Decode[p]
-        else:
-          if p < Jis0208Decode.len:
-            c = Jis0208Decode[p]
-        jis0212 = false
-        if c != 0:
-          stream.append_codepoint c, oq, olen, n
-          continue
-      else:
-        lead = 0
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-    elif cast[char](b) in Ascii:
-      stream.append_codepoint b, oq, olen, n
-    elif b in {0x8Eu8, 0x8Fu8, 0xA1u8 .. 0xFEu8}:
-      lead = b
-    else:
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-  stream.eucjpjis0212 = jis0212
-  stream.eucjplead = lead
-
-proc decodeISO2022JP(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  var i = 0
-  var lead = stream.iso2022jplead
-  var state = stream.iso2022jpstate
-  var output = stream.iso2022jpoutput
-  var outputstate = stream.iso2022jpoutputstate
-  var buf = stream.iso2022jpbuf
-  var hasbuf = stream.iso2022jphasbuf
-  while i < iq.len:
-    let b = if hasbuf:
-      hasbuf = false
-      dec i
-      buf
-    else:
-      iq[i]
-    case state
-    of STATE_ASCII:
-      case b
-      of 0x1B: state = STATE_ESCAPE_START
-      of {0x00u8..0x7Fu8} - {0x0Eu8, 0x0Fu8, 0x1Bu8}:
-        output = false
-        stream.append_codepoint b, oq, olen, n
-      else:
-        output = false
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    of STATE_ROMAN:
-      case b
-      of 0x1B: state = STATE_ESCAPE_START
-      of 0x5C:
-        output = false
-        stream.append_codepoint 0x00A5, oq, olen, n
-      of 0x7E:
-        output = false
-        stream.append_codepoint 0x203E, oq, olen, n
-      of {0x00u8..0x7Fu8} - {0x0Eu8, 0x0Fu8, 0x1Bu8, 0x5Cu8, 0x7Eu8}:
-        output = false
-        stream.append_codepoint b, oq, olen, n
-      else:
-        output = false
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    of STATE_KATAKANA:
-      case b
-      of 0x1B: state = STATE_ESCAPE_START
-      of 0x21u8..0x5Fu8:
-        output = false
-        stream.append_codepoint 0xFF61u16 - 0x21 + uint16(b), oq, olen, n
-      else:
-        output = false
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    of STATE_LEAD_BYTE:
-      case b
-      of 0x1B: state = STATE_ESCAPE_START
-      of 0x21u8..0x7Eu8:
-        output = false
-        lead = b
-        state = STATE_TRAIL_BYTE
-      else:
-        output = false
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    of STATE_TRAIL_BYTE:
-      case b
-      of 0x1B:
-        state = STATE_ESCAPE_START
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-      of 0x21u8..0x7Eu8:
-        state = STATE_LEAD_BYTE
-        let p = (uint16(lead) - 0x21) * 94 + uint16(b) - 0x21
-        if p < Jis0208Decode.len:
-          let c = Jis0208Decode[p]
-          if c != 0:
-            stream.append_codepoint c, oq, olen, n
-          else:
-            stream.handleError(oq, olen, n)
-            if stream.isend: break
-      else:
-        state = STATE_LEAD_BYTE
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-    of STATE_ESCAPE_START:
-      if b == 0x24 or b == 0x28:
-        lead = b
-        state = STATE_ESCAPE
-      else:
-        output = false
-        state = outputstate
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-        continue # prepend (no inc i)
-    of STATE_ESCAPE:
-      let l = lead
-      lead = 0
-      block statenonnull:
-        var s: ISO2022JPState
-        if l == 0x28:
-          case b
-          of 0x42: s = STATE_ASCII
-          of 0x4A: s = STATE_ROMAN
-          of 0x49: s = STATE_KATAKANA
-          else: break statenonnull
-        elif l == 0x24 and b in {0x40u8, 0x42u8}:
-          s = STATE_LEAD_BYTE
-        else: break statenonnull
-        state = s
-        outputstate = s
-        if output:
-          stream.handleError(oq, olen, n)
-          if stream.isend:
-            break
-        output = true
-        inc i
-        continue
-      output = false
-      state = outputstate
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-      hasbuf = true
-      buf = l
-      continue # prepend (no inc i)
-    inc i
-  stream.iso2022jphasbuf = hasbuf
-  stream.iso2022jpbuf = buf
-  stream.iso2022jplead = lead
-  stream.iso2022jpstate = state
-  stream.iso2022jpoutput = output
-  stream.iso2022jpoutputstate = outputstate
-
-proc decodeShiftJIS(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  var lead = stream.sjislead
-  var i = 0
-  while i < iq.len:
-    let b = iq[i]
-    if lead != 0:
-      var ptrisnull = true;
-      var p = 0u16
-      let offset = if b < 0x7Fu8: 0x40u16 else: 0x41u16
-      let leadoffset = if lead < 0xA0: 0x81u16 else: 0xC1u16
-      if b in 0x40u8..0x7Eu8 or b in 0x80u8..0xFCu8:
-        p = (uint16(lead) - leadoffset) * 188 + uint16(b) - offset
-        ptrisnull = false
-      lead = 0
-      if not ptrisnull and p in 8836u16..10715u16:
-        stream.append_codepoint 0xE000u16 - 8836 + p, oq, olen, n
-        inc i
-        continue
-      elif not ptrisnull and p < Jis0208Decode.len and Jis0208Decode[p] != 0:
-        let c = Jis0208Decode[p]
-        stream.append_codepoint c, oq, olen, n
-      else:
-        stream.handleError(oq, olen, n)
-        if stream.isend: break
-        if cast[char](b) in Ascii:
-          continue # prepend (no inc i)
-    elif cast[char](b) in Ascii or b == 0x80:
-      stream.append_codepoint b, oq, olen, n
-    elif b in 0xA1u8..0xDFu8:
-      stream.append_codepoint 0xFF61u16 - 0xA1 + uint16(b), oq, olen, n
-    elif b in {0x81..0x9F} + {0xE0..0xFC}:
-      lead = b
-    else:
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-    inc i
-  stream.sjislead = lead
-
-proc decodeEUCKR(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  var lead = stream.euckrlead
-  for b in iq:
-    if lead != 0:
-      if b in 0x41u8..0xFEu8:
-        let p = (uint16(lead) - 0x81) * 190 + (uint16(b) - 0x41)
-        if p < EUCKRDecode.len and EUCKRDecode[p] != 0:
-          let c = EUCKRDecode[p]
-          stream.append_codepoint c, oq, olen, n
-          continue
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-    elif cast[char](b) in Ascii:
-      stream.append_codepoint b, oq, olen, n
-    elif b in {0x81u8..0xFEu8}:
-      lead = b
-    else:
-      stream.handleError(oq, olen, n)
-      if stream.isend: break
-  stream.euckrlead = lead
-
-proc decodeUTF16(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int, be: static bool) =
-  var i = 0
-  var lead = stream.u16lead
-  var haslead = stream.u16haslead
-  var surr = stream.u16surr
-  var hassurr = stream.u16hassurr
-  while i < iq.len:
-    if not haslead:
-      haslead = true
-      lead = iq[i]
-    else:
-      let cu = if be:
-        (uint16(lead) shl 8) + uint16(iq[i])
-      else:
-        (uint16(iq[i]) shl 8) + uint16(lead)
-      haslead = false
-      if hassurr:
-        hassurr = false
-        if cu in 0xDC00u16 .. 0xDFFFu16:
-          let c = 0x10000 + ((uint32(surr) - 0xD800) shl 10) + (uint32(cu) - 0xDC00)
-          stream.append_codepoint c, oq, olen, n
-          inc i
-          continue
-        haslead = true # prepend the last two bytes
-        stream.handleError(oq, olen, n)
-        continue
-      if cu in 0xD800u16 .. 0xDBFFu16:
-        surr = cu
-        hassurr = true
-        inc i
-        continue
-      elif cu in 0xDC00u16 .. 0xDFFFu16:
-        stream.handleError(oq, olen, n)
-        if stream.isend: # fatal error
-          break
-        else:
-          inc i
-          continue
-      stream.append_codepoint uint32(cu), oq, olen, n
-    inc i
-  stream.u16lead = lead
-  stream.u16haslead = haslead
-  stream.u16surr = surr
-  stream.u16hassurr = hassurr
-
-proc decodeUTF16LE(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  stream.decodeUTF16(iq, oq, olen, n, false)
-
-proc decodeUTF16BE(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  stream.decodeUTF16(iq, oq, olen, n, true)
-
-proc decodeXUserDefined(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int) =
-  for b in iq:
-    let c = cast[char](b)
-    if c in Ascii:
-      stream.append_codepoint c, oq, olen, n
-    else:
-      let c = 0xF780 + uint32(c) - 0x80
-      stream.append_codepoint c, oq, olen, n
-
-proc decodeSingleByte(stream: DecoderStream, iq: openArray[uint8],
-    oq: ptr UncheckedArray[uint32], olen: int, n: var int,
-    map: array[char, uint16]) =
-  for b in iq:
-    let c = cast[char](b)
-    if c in Ascii:
-      stream.append_codepoint c, oq, olen, n
-    else:
-      let p = map[cast[char](b - 0x80)]
-      if p == 0u16:
-        stream.handleError(oq, olen, n)
-      else:
-        stream.append_codepoint uint32(p), oq, olen, n
-
-proc decodeReplacement(stream: DecoderStream, oq: ptr UncheckedArray[uint32],
-    olen: int, n: var int) =
-  if not stream.replreported:
-    stream.replreported = true
-    stream.handleError(oq, olen, n)
-  # I think that's it?
-
-# copy any data remaining from previous passes
-proc copyBuffers(stream: DecoderStream, oq: ptr UncheckedArray[uint32], olen: int): int =
-  if stream.bufs.len == 1:
-    # one page: stream.bs ..< stream.bi
-    let n = min((stream.bi - stream.bs) * sizeof(stream.bufs[0][0]), olen)
-    copyMem(addr oq[0], addr stream.bufs[0][stream.bs], n)
-    stream.bs += n div sizeof(uint32)
-    if stream.bs >= stream.bi:
-      # read entire page; recycle it
-      stream.bs = 0
-      stream.bi = 0
-    return n
-  else:
-    # multiple pages:
-    # stream.bs ..< stream.buflen
-    # 0 ..< stream.buflen
-    # ...
-    # 0 ..< stream.bi
-    let a = (stream.buflen - stream.bs) * sizeof(stream.bufs[0][0])
-    if a < olen:
-      copyMem(addr oq[0], addr stream.bufs[0][stream.bs], a)
-      var ns = a
-      stream.bs = 0
-      var i = 1
-      while i < stream.bufs.high:
-        let n = min(stream.buflen * sizeof(stream.bufs[0][0]), olen - ns)
-        copyMem(addr oq[ns div sizeof(uint32)], addr stream.bufs[i][0], n)
-        ns += n
-        if ns >= olen:
-          # i'th buffer still has contents.
-          stream.bs = n div sizeof(uint32)
-          break
-        stream.bs = 0
-        inc i
-      if ns < olen:
-        # last page
-        let n = min(stream.bi * sizeof(stream.bufs[0][0]), olen - ns)
-        copyMem(addr oq[ns div sizeof(uint32)], addr stream.bufs[i][0], n)
-        ns += n
-        stream.bs = n div sizeof(uint32)
-        if stream.bs >= stream.bi:
-          # read entire page; recycle it
-          stream.bs = 0
-          stream.bi = 0
-      for j in i ..< stream.bufs.len:
-        stream.bufs[j - i] = stream.bufs[j]
-      stream.bufs.setLen(stream.bufs.len - i)
-      return ns
-    elif a > olen:
-      copyMem(addr oq[0], addr stream.bufs[0][stream.bs], olen)
-      stream.bs += olen div sizeof(uint32)
-      assert stream.bs < stream.buflen
-      return olen
-    else: # a == olen
-      copyMem(addr oq[0], addr stream.bufs[0][stream.bs], a)
-      stream.bs = 0
-      stream.bufs.delete(0)
-      return a
-
-proc checkEnd(stream: DecoderStream, oq: ptr UncheckedArray[uint32], olen: int,
-              n: var int) =
-  if not stream.isend and stream.bufs.len == 1 and
-      stream.bs >= stream.bi and stream.source.atEnd:
-    stream.isend = true
-    case stream.charset
-    of CHARSET_UTF_16_LE, CHARSET_UTF_16_BE:
-      if stream.u16haslead or stream.u16hassurr:
-        stream.handleError(oq, olen, n)
-    of CHARSET_UTF_8:
-      if stream.u8needed != 0:
-        stream.handleError(oq, olen, n)
-    of CHARSET_GB18030, CHARSET_GBK:
-      if stream.gb18first != 0 or stream.gb18second != 0 or stream.gb18third != 0:
-        stream.handleError(oq, olen, n)
-    of CHARSET_BIG5:
-      if stream.big5lead != 0:
-        stream.handleError(oq, olen, n)
-    of CHARSET_EUC_JP:
-      if stream.eucjplead != 0:
-        stream.handleError(oq, olen, n)
-    of CHARSET_ISO_2022_JP:
-      case stream.iso2022jpstate
-      of STATE_ASCII, STATE_ROMAN, STATE_KATAKANA, STATE_LEAD_BYTE: discard
-      of STATE_TRAIL_BYTE:
-        stream.handleError(oq, olen, n)
-      of STATE_ESCAPE_START:
-        stream.handleError(oq, olen, n)
-      of STATE_ESCAPE:
-        stream.isend = false
-        stream.iso2022jpbuf = stream.iso2022jplead
-        stream.iso2022jphasbuf = true
-        stream.iso2022jplead = 0
-        stream.iso2022jpoutput = false
-        stream.iso2022jpstate = stream.iso2022jpoutputstate
-        stream.handleError(oq, olen, n)
-    of CHARSET_SHIFT_JIS:
-      if stream.sjislead != 0:
-        stream.handleError(oq, olen, n)
-    of CHARSET_EUC_KR:
-      if stream.euckrlead != 0:
-        stream.handleError(oq, olen, n)
-    else: discard
-
-proc prepend*(stream: DecoderStream, c: uint32) =
-  append_codepoint_buf stream, c
-
-const ReadSize = 4096
-proc readData*(stream: DecoderStream, buffer: pointer, olen: int): int =
-  const l = sizeof(stream.bufs[0][0])
-  assert olen mod l == 0, "Buffer size must be divisible by " & $l
-  if olen == 0: return
-  let oq = cast[ptr UncheckedArray[uint32]](buffer)
-  result = stream.copyBuffers(oq, olen)
-  let olen = olen - result
-  if olen == 0 or stream.source.atEnd:
-    # either output filled with buffered data; nothing to decode
-    # or we're at the end of the source stream
-    stream.checkEnd(oq, olen, result)
-    return result
-  var iq = newSeqUninitialized[uint8](ReadSize)
-  let ilen = stream.source.readData(cast[pointer](addr iq[0]), ReadSize)
-  if ilen == 0:
-    stream.checkEnd(oq, olen, result)
-    return result
-  template iqoa: openArray[uint8] = toOpenArray(iq, 0, ilen - 1)
-  case stream.charset
-  of CHARSET_UTF_8:
-    stream.decodeUTF8(iqoa, oq, olen, result)
-  of CHARSET_IBM866:
-    stream.decodeSingleByte(iqoa, oq, olen, result, IBM866Decode)
-  of CHARSET_ISO_8859_2:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88592Decode)
-  of CHARSET_ISO_8859_3:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88593Decode)
-  of CHARSET_ISO_8859_4:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88594Decode)
-  of CHARSET_ISO_8859_5:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88595Decode)
-  of CHARSET_ISO_8859_6:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88596Decode)
-  of CHARSET_ISO_8859_7:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88597Decode)
-  of CHARSET_ISO_8859_8,
-     CHARSET_ISO_8859_8_I:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO88598Decode)
-  of CHARSET_ISO_8859_10:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO885910Decode)
-  of CHARSET_ISO_8859_13:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO885913Decode)
-  of CHARSET_ISO_8859_14:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO885914Decode)
-  of CHARSET_ISO_8859_15:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO885915Decode)
-  of CHARSET_ISO_8859_16:
-    stream.decodeSingleByte(iqoa, oq, olen, result, ISO885916Decode)
-  of CHARSET_KOI8_R:
-    stream.decodeSingleByte(iqoa, oq, olen, result, KOI8RDecode)
-  of CHARSET_KOI8_U:
-    stream.decodeSingleByte(iqoa, oq, olen, result, KOI8UDecode)
-  of CHARSET_MACINTOSH:
-    stream.decodeSingleByte(iqoa, oq, olen, result, MacintoshDecode)
-  of CHARSET_WINDOWS_874:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows874Decode)
-  of CHARSET_WINDOWS_1250:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1250Decode)
-  of CHARSET_WINDOWS_1251:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1251Decode)
-  of CHARSET_WINDOWS_1252:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1252Decode)
-  of CHARSET_WINDOWS_1253:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1253Decode)
-  of CHARSET_WINDOWS_1254:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1254Decode)
-  of CHARSET_WINDOWS_1255:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1255Decode)
-  of CHARSET_WINDOWS_1256:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1256Decode)
-  of CHARSET_WINDOWS_1257:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1257Decode)
-  of CHARSET_WINDOWS_1258:
-    stream.decodeSingleByte(iqoa, oq, olen, result, Windows1258Decode)
-  of CHARSET_X_MAC_CYRILLIC:
-    stream.decodeSingleByte(iqoa, oq, olen, result, XMacCyrillicDecode)
-  of CHARSET_GBK, CHARSET_GB18030:
-    stream.decodeGb18030(iqoa, oq, olen, result)
-  of CHARSET_BIG5:
-    stream.decodeBig5(iqoa, oq, olen, result)
-  of CHARSET_EUC_JP:
-    stream.decodeEUCJP(iqoa, oq, olen, result)
-  of CHARSET_ISO_2022_JP:
-    stream.decodeISO2022JP(iqoa, oq, olen, result)
-  of CHARSET_SHIFT_JIS:
-    stream.decodeShiftJIS(iqoa, oq, olen, result)
-  of CHARSET_EUC_KR:
-    stream.decodeEUCKR(iqoa, oq, olen, result)
-  of CHARSET_REPLACEMENT:
-    stream.decodeReplacement(oq, olen, result)
-  of CHARSET_UTF_16_LE:
-    stream.decodeUTF16LE(iqoa, oq, olen, result)
-  of CHARSET_UTF_16_BE:
-    stream.decodeUTF16BE(iqoa, oq, olen, result)
-  of CHARSET_X_USER_DEFINED:
-    stream.decodeXUserDefined(iqoa, oq, olen, result)
-  of CHARSET_UNKNOWN:
-    doAssert false, "Somebody forgot to set the character set here"
-  stream.checkEnd(oq, olen, result)
-
-# Returns the number of bytes read.
-proc readData*(stream: DecoderStream, buf: var openarray[uint32]): int =
-  return stream.readData(addr buf[0], buf.len * sizeof(buf[0]))
-
-proc atEnd*(stream: DecoderStream): bool =
-  return stream.isend
-
-# Read all and convert to UTF-8.
-# Probably not very efficient. Oh well.
-proc readAll*(stream: DecoderStream): string =
-  var buf = newSeqUninitialized[uint32](stream.buflen)
-  while not stream.atEnd:
-    let n = stream.readData(buf)
-    for i in 0 ..< n div 4:
-      let r = cast[Rune](buf[i])
-      result &= $r
-
-proc newDecoderStream*(source: Stream, cs = CHARSET_UTF_8, buflen = 1024,
-                       errormode = DECODER_ERROR_MODE_REPLACEMENT): DecoderStream =
-  result = DecoderStream(
-    source: source,
-    charset: cs,
-    buflen: buflen,
-    errormode: errormode
-  )
-  when nimvm:
-    result.bufs = @[newSeq[uint32](buflen)]
-  else:
-    result.bufs = @[newSeqUninitialized[uint32](buflen)]
-  case cs
-  of CHARSET_UTF_8:
-    result.u8bounds = 0x80u8 .. 0xBFu8
-  else: discard