about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-07-12 00:05:14 +0200
committerbptato <nincsnevem662@gmail.com>2023-07-12 00:13:59 +0200
commitaf3c8348a096b80a22d9463c516a932689a4836c (patch)
treef340768cd666ed56e9fbf87e3a03b01e4e6d6d04
parentf150a706cfbe07ba0ebbfb6fdd904ff454ad7c60 (diff)
downloadchawan-af3c8348a096b80a22d9463c516a932689a4836c.tar.gz
Improve encoding support
* Use the output charset in lineedit (as w3m does)
* encoder: fix broken UTF-8 encoding, use openArray instead of var
  seq for input queue
* Add RuneStream as an in-memory interface to EncoderStream
* Document display-charset config option
-rw-r--r--doc/config.md24
-rw-r--r--src/display/client.nim4
-rw-r--r--src/display/term.nim87
-rw-r--r--src/encoding/decoderstream.nim2
-rw-r--r--src/encoding/encoderstream.nim207
-rw-r--r--src/io/lineedit.nim65
-rw-r--r--src/io/runestream.nim31
7 files changed, 259 insertions, 161 deletions
diff --git a/doc/config.md b/doc/config.md
index 6662d1be..443d7003 100644
--- a/doc/config.md
+++ b/doc/config.md
@@ -111,21 +111,23 @@ Following is a list of encoding options:
 </tr>
 
 <tr>
-<td>fallback-charset</td>
+<td>document-charset</td>
 <td>string/array</td>
-<td>Default character set for loading documents.<br>
-For text
-documents, all listed character sets are enumerated until the document has been
-decoded without errors.</td>
+<td>List of character sets for loading documents.<br>
+All listed character sets are enumerated until the document has been decoded
+without errors. In HTML, meta tags and the BOM may override this with a
+different charset, so long as the specified charset can decode the document
+correctly.
+</td>
 </tr>
 
 <tr>
-<td>document-charset</td>
-<td>string/array</td>
-<td>List of forced character sets for loading documents.<br>
-All listed character sets are enumerated until the document has been decoded
-without errors.<br>
-Overrides fallback-charset.</td>
+<td>display-charset</td>
+<td>string</td>
+<td>Character set for keyboard input and displaying documents.<br>
+Used in dump mode as well.<br>
+(This means that e.g. `cha -I EUC-JP -O UTF-8 a > b` is equivalent to `iconv
+-f EUC-JP -t UTF-8.)</td>
 </tr>
 
 </table>
diff --git a/src/display/client.nim b/src/display/client.nim
index aa36cdac..a292bf0a 100644
--- a/src/display/client.nim
+++ b/src/display/client.nim
@@ -196,12 +196,12 @@ proc input(client: Client) =
       client.line = edit
       if edit.escNext:
         edit.escNext = false
-        if edit.write(client.s):
+        if edit.write(client.s, client.pager.term.cs):
           client.s = ""
       else:
         let action = getLinedAction(client.config, client.s)
         if action == "":
-          if edit.write(client.s):
+          if edit.write(client.s, client.pager.term.cs):
             client.s = ""
           else:
             client.feedNext = true
diff --git a/src/display/term.nim b/src/display/term.nim
index 4334123d..0aa7daa9 100644
--- a/src/display/term.nim
+++ b/src/display/term.nim
@@ -10,6 +10,7 @@ import buffer/cell
 import config/config
 import data/charset
 import encoding/encoderstream
+import io/runestream
 import io/window
 import types/color
 import utils/opt
@@ -42,7 +43,7 @@ type
 
   Terminal* = ref TerminalObj
   TerminalObj = object
-    cs: Charset
+    cs*: Charset
     config: Config
     infile: File
     outfile: File
@@ -367,37 +368,65 @@ proc setTitle*(term: Terminal, title: string) =
   if term.set_title:
     term.outfile.write(XTERM_TITLE(title))
 
+const colorFormat = block:
+  var format = newFormat()
+  format.fgcolor = cellColor(ANSI_BLUE)
+  format
+
+const defaultFormat = newFormat()
+
+template processOutputString0*(term: Terminal, str: iterable[Rune],
+    colorctrl: static bool, w: var int): string =
+  var rs0: seq[Rune]
+  var ctrl = false
+  var format = newFormat()
+  discard ctrl
+  discard format
+  for r in str:
+    if r.isControlChar():
+      when colorctrl:
+        if not ctrl:
+          rs0 &= term.processFormat(format, colorFormat).toRunes()
+      rs0 &= Rune('^')
+      rs0 &= Rune(cast[char](r).getControlLetter())
+    else:
+      when colorctrl:
+        if ctrl:
+          rs0 &= term.processFormat(format, defaultFormat).toRunes()
+      rs0 &= r
+    ctrl = r.isControlChar()
+    # twidth wouldn't work here, the view may start at the nth character.
+    # pager must ensure tabs are converted beforehand.
+    w += r.width()
+  let ss = newRuneStream(toOpenArray(cast[seq[uint32]](rs0), 0, rs0.high))
+  let es = newEncoderStream(ss, term.cs, errormode = ENCODER_ERROR_MODE_FATAL)
+  es.readAll()
+
 proc processOutputString*(term: Terminal, str: string, w: var int): string =
   if str.validateUtf8() != -1:
     return "?"
-  if term.cs != CHARSET_UTF_8:
-    #TODO: This is incredibly inefficient.
-    var u32buf = ""
-    for r in str.runes():
-      let tw = r.width()
-      if r.isControlChar():
-        u32buf &= char(0) & char(0) & char(0) & "^" &
-          char(0) & char(0) & char(0) & getControlLetter(char(r))
-      elif tw != 0:
-        let ol = u32buf.len
-        u32buf.setLen(ol + sizeof(uint32))
-        var u32 = cast[uint32](r)
-        copyMem(addr u32buf[ol], addr u32, sizeof(u32))
-      w += tw
-    let ss = newStringStream(u32buf)
-    let encoder = newEncoderStream(ss, cs = term.cs,
-      errormode = ENCODER_ERROR_MODE_FATAL)
-    result &= encoder.readAll()
-  else:
-    for r in str.runes():
-      # twidth wouldn't work here, the view may start at the nth character.
-      # pager must ensure tabs are converted beforehand.
-      let tw = r.width()
-      if r.isControlChar():
-        result &= "^" & getControlLetter(char(r))
-      elif tw != 0:
-        result &= r
-      w += tw
+  if term.cs == CHARSET_UTF_8:
+    # optimized common case
+    block notfound:
+      for c in str:
+        if c in Controls:
+          break notfound
+      # No control characters, and the output encoding matches the internal
+      # representation.
+      w += str.width()
+      return str
+    var s = ""
+    for c in str:
+      if c in Controls:
+        s &= '^'
+        s &= c.getControlLetter()
+      else:
+        s &= c
+      # no twidth, see above
+      w += Rune(c).width()
+    return s
+  # Output is not utf-8, so we must convert back to utf-32 and then encode.
+  return term.processOutputString0(str.runes, false, w)
 
 proc generateFullOutput(term: Terminal, grid: FixedGrid): string =
   var format = newFormat()
diff --git a/src/encoding/decoderstream.nim b/src/encoding/decoderstream.nim
index ad0138ca..22702385 100644
--- a/src/encoding/decoderstream.nim
+++ b/src/encoding/decoderstream.nim
@@ -806,7 +806,7 @@ proc readData*(stream: DecoderStream, buffer: pointer, olen: int): int =
   stream.checkEnd(oq, olen, result)
 
 # Returns the number of bytes read.
-proc readData*(stream: DecoderStream, buf: var seq[uint32]): int =
+proc readData*(stream: DecoderStream, buf: var openarray[uint32]): int =
   return stream.readData(addr buf[0], buf.len * sizeof(buf[0]))
 
 proc atEnd*(stream: DecoderStream): bool =
diff --git a/src/encoding/encoderstream.nim b/src/encoding/encoderstream.nim
index 397d43d8..45911579 100644
--- a/src/encoding/encoderstream.nim
+++ b/src/encoding/encoderstream.nim
@@ -104,36 +104,36 @@ proc gb18030RangesPointer(c: uint32): uint32 =
     p = elem.p
   return p + c - offset
 
-proc encodeUTF8(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
-  var i = 0
-  while i < ilen:
-    let c = iq[i]
-    var count: int
-    var offset: uint8
-    case c
-    of 0x0080..0x07FF:
-      count = 1
-      offset = 0xC0
-    of 0x0800..0xFFFF:
-      count = 2
-      offset = 0xE0
-    of 0x10000..0x10FFFF:
-      count = 3
-      offset = 0xF0
+proc encodeUTF8(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
+  for c in iq:
+    if c < 0x80:
+      stream.append_byte c, oq, olen, n
     else:
-      assert false
-    stream.append_byte (c shr (6 * count)) + offset, oq, olen, n
-    while count > 0:
-      let tmp = c shr (6 * (count - 1))
-      stream.append_byte 0x80 or (tmp and 0x3F), oq, olen, n
-      dec count
+      var count: int
+      var offset: uint8
+      case c
+      of 0x80..0x7FF:
+        count = 1
+        offset = 0xC0
+      of 0x800..0xFFFF:
+        count = 2
+        offset = 0xE0
+      of 0x10000..0x10FFFF:
+        count = 3
+        offset = 0xF0
+      else:
+        assert false
+        {.linearScanEnd.}
+      stream.append_byte (c shr (6 * count)) + offset, oq, olen, n
+      for j in countdown(count - 1, 0):
+        let tmp = c shr (6 * j)
+        stream.append_byte 0x80 or (tmp and 0x3F), oq, olen, n
 
-proc encodeSingleByte(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int,
+proc encodeSingleByte(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int,
     map: seq[tuple[ucs: uint16, val: char]]) =
-  for i in 0 ..< ilen:
-    let c = iq[i]
+  for c in iq:
     if c < 0x80:
       stream.append_byte cast[uint8](c), oq, olen, n
       continue
@@ -143,10 +143,9 @@ proc encodeSingleByte(stream: EncoderStream, iq: var seq[uint32],
     else:
       stream.handleError(oq, olen, n, c)
 
-proc encodeXUserDefined(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
-  for i in 0 ..< ilen:
-    let c = iq[i]
+proc encodeXUserDefined(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
+  for c in iq:
     if c < 0x80:
       stream.append_byte cast[uint8](c), oq, olen, n
       continue
@@ -156,9 +155,8 @@ proc encodeXUserDefined(stream: EncoderStream, iq: var seq[uint32],
       continue
     stream.handleError(oq, olen, n, c)
 
-proc encodeGb18030(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int,
-    isGBK = false) =
+proc encodeGb18030(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int, isGBK = false) =
   for c in iq:
     if isGBK and c == 0x20AC:
       stream.append_byte 0x80, oq, olen, n
@@ -187,8 +185,8 @@ proc encodeGb18030(stream: EncoderStream, iq: var seq[uint32],
     stream.append_byte b3, oq, olen, n
     stream.append_byte b4, oq, olen, n
 
-proc encodeBig5(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
+proc encodeBig5(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
   for c in iq:
     if c < 0x80:
       stream.append_byte c, oq, olen, n
@@ -204,8 +202,8 @@ proc encodeBig5(stream: EncoderStream, iq: var seq[uint32],
     stream.append_byte lead, oq, olen, n
     stream.append_byte cast[uint8](trail) + offset, oq, olen, n
 
-proc encodeEUCJP(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
+proc encodeEUCJP(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
   for c in iq:
     if c < 0x80:
       stream.append_byte c, oq, olen, n
@@ -231,11 +229,11 @@ proc encodeEUCJP(stream: EncoderStream, iq: var seq[uint32],
       else:
         stream.handleError(oq, olen, n, c)
 
-proc encodeISO2022JP(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
+proc encodeISO2022JP(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
   var state = stream.iso2022jpstate
   var i = 0
-  while i < ilen:
+  while i < iq.len:
     let c = iq[i]
     if state in {STATE_ASCII, STATE_ROMAN} and
         c in [0x0Eu32, 0x0Fu32, 0x1Bu32]:
@@ -295,8 +293,8 @@ proc encodeISO2022JP(stream: EncoderStream, iq: var seq[uint32],
     inc i
   stream.iso2022jpstate = state
 
-proc encodeShiftJIS(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
+proc encodeShiftJIS(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
   for c in iq:
     if c <= 0x80:
       stream.append_byte c, oq, olen, n
@@ -320,8 +318,8 @@ proc encodeShiftJIS(stream: EncoderStream, iq: var seq[uint32],
         stream.append_byte lead + lead_offset, oq, olen, n
         stream.append_byte trail + offset, oq, olen, n
 
-proc encodeEUCKR(stream: EncoderStream, iq: var seq[uint32],
-    oq: ptr UncheckedArray[uint8], ilen, olen: int, n: var int) =
+proc encodeEUCKR(stream: EncoderStream, iq: openArray[uint32],
+    oq: ptr UncheckedArray[uint8], olen: int, n: var int) =
   for c in iq:
     if c < 0x80:
       stream.append_byte c, oq, olen, n
@@ -407,6 +405,7 @@ proc checkEnd(stream: EncoderStream, oq: ptr UncheckedArray[uint8], olen: int,
         stream.append_byte 0x42, oq, olen, n
 
 const ReadSize = 4096
+var iq {.threadVar.}: array[ReadSize div sizeof(uint32), uint32]
 proc readData*(stream: EncoderStream, buffer: pointer, olen: int): int =
   if olen == 0: return
   let oq = cast[ptr UncheckedArray[uint8]](buffer)
@@ -417,50 +416,88 @@ proc readData*(stream: EncoderStream, buffer: pointer, olen: int): int =
     # or we're at the end of the source stream
     stream.checkEnd(oq, olen, result)
     return result
-  var iq = newSeqUninitialized[uint32](ReadSize div sizeof(uint32))
   let ilen0 = stream.source.readData(cast[pointer](addr iq[0]), ReadSize)
+  #TODO what if ilen0 is 0?
+  assert ilen0 != 0
   assert ilen0 mod sizeof(uint32) == 0 #TODO what to do if false?
   let ilen = ilen0 div sizeof(uint32)
-  iq.setLen(ilen)
+  template iqoa: openArray[uint32] =
+    toOpenArray(iq, 0, ilen - 1)
   case stream.charset
-  of CHARSET_UTF_8: stream.encodeUTF8(iq, oq, ilen, olen, result)
-  of CHARSET_IBM866: stream.encodeSingleByte(iq, oq, ilen, olen, result, IBM866Encode)
-  of CHARSET_ISO_8859_2: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88592Encode)
-  of CHARSET_ISO_8859_3: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88593Encode)
-  of CHARSET_ISO_8859_4: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88594Encode)
-  of CHARSET_ISO_8859_5: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88595Encode)
-  of CHARSET_ISO_8859_6: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88596Encode)
-  of CHARSET_ISO_8859_7: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88597Encode)
-  of CHARSET_ISO_8859_8,
-     CHARSET_ISO_8859_8_I: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO88598Encode)
-  of CHARSET_ISO_8859_10: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO885910Encode)
-  of CHARSET_ISO_8859_13: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO885913Encode)
-  of CHARSET_ISO_8859_14: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO885914Encode)
-  of CHARSET_ISO_8859_15: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO885915Encode)
-  of CHARSET_ISO_8859_16: stream.encodeSingleByte(iq, oq, ilen, olen, result, ISO885916Encode)
-  of CHARSET_KOI8_R: stream.encodeSingleByte(iq, oq, ilen, olen, result, KOI8REncode)
-  of CHARSET_KOI8_U: stream.encodeSingleByte(iq, oq, ilen, olen, result, KOI8UEncode)
-  of CHARSET_MACINTOSH: stream.encodeSingleByte(iq, oq, ilen, olen, result, MacintoshEncode)
-  of CHARSET_WINDOWS_874: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows874Encode)
-  of CHARSET_WINDOWS_1250: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1250Encode)
-  of CHARSET_WINDOWS_1251: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1251Encode)
-  of CHARSET_WINDOWS_1252: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1252Encode)
-  of CHARSET_WINDOWS_1253: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1253Encode)
-  of CHARSET_WINDOWS_1254: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1254Encode)
-  of CHARSET_WINDOWS_1255: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1255Encode)
-  of CHARSET_WINDOWS_1256: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1256Encode)
-  of CHARSET_WINDOWS_1257: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1257Encode)
-  of CHARSET_WINDOWS_1258: stream.encodeSingleByte(iq, oq, ilen, olen, result, Windows1258Encode)
-  of CHARSET_X_MAC_CYRILLIC: stream.encodeSingleByte(iq, oq, ilen, olen, result, XMacCyrillicEncode)
-  of CHARSET_GBK: stream.encodeGb18030(iq, oq, ilen, olen, result, true)
-  of CHARSET_GB18030: stream.encodeGb18030(iq, oq, ilen, olen, result)
-  of CHARSET_BIG5: stream.encodeBig5(iq, oq, ilen, olen, result)
-  of CHARSET_EUC_JP: stream.encodeEUCJP(iq, oq, ilen, olen, result)
-  of CHARSET_ISO_2022_JP: stream.encodeISO2022JP(iq, oq, ilen, olen, result)
-  of CHARSET_SHIFT_JIS: stream.encodeShiftJIS(iq, oq, ilen, olen, result)
-  of CHARSET_EUC_KR: stream.encodeEUCKR(iq, oq, ilen, olen, result)
-  of CHARSET_X_USER_DEFINED: stream.encodeXUserDefined(iq, oq, ilen, olen, result)
-  of CHARSET_UNKNOWN: assert false, "Somebody forgot to set the character set here"
+  of CHARSET_UTF_8:
+    stream.encodeUTF8(iqoa, oq, olen, result)
+  of CHARSET_IBM866:
+    stream.encodeSingleByte(iqoa, oq, olen, result, IBM866Encode)
+  of CHARSET_ISO_8859_2:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88592Encode)
+  of CHARSET_ISO_8859_3:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88593Encode)
+  of CHARSET_ISO_8859_4:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88594Encode)
+  of CHARSET_ISO_8859_5:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88595Encode)
+  of CHARSET_ISO_8859_6:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88596Encode)
+  of CHARSET_ISO_8859_7:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88597Encode)
+  of CHARSET_ISO_8859_8, CHARSET_ISO_8859_8_I:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO88598Encode)
+  of CHARSET_ISO_8859_10:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO885910Encode)
+  of CHARSET_ISO_8859_13:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO885913Encode)
+  of CHARSET_ISO_8859_14:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO885914Encode)
+  of CHARSET_ISO_8859_15:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO885915Encode)
+  of CHARSET_ISO_8859_16:
+    stream.encodeSingleByte(iqoa, oq, olen, result, ISO885916Encode)
+  of CHARSET_KOI8_R:
+    stream.encodeSingleByte(iqoa, oq, olen, result, KOI8REncode)
+  of CHARSET_KOI8_U:
+    stream.encodeSingleByte(iqoa, oq, olen, result, KOI8UEncode)
+  of CHARSET_MACINTOSH:
+    stream.encodeSingleByte(iqoa, oq, olen, result, MacintoshEncode)
+  of CHARSET_WINDOWS_874:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows874Encode)
+  of CHARSET_WINDOWS_1250:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1250Encode)
+  of CHARSET_WINDOWS_1251:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1251Encode)
+  of CHARSET_WINDOWS_1252:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1252Encode)
+  of CHARSET_WINDOWS_1253:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1253Encode)
+  of CHARSET_WINDOWS_1254:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1254Encode)
+  of CHARSET_WINDOWS_1255:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1255Encode)
+  of CHARSET_WINDOWS_1256:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1256Encode)
+  of CHARSET_WINDOWS_1257:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1257Encode)
+  of CHARSET_WINDOWS_1258:
+    stream.encodeSingleByte(iqoa, oq, olen, result, Windows1258Encode)
+  of CHARSET_X_MAC_CYRILLIC:
+    stream.encodeSingleByte(iqoa, oq, olen, result, XMacCyrillicEncode)
+  of CHARSET_GBK:
+    stream.encodeGb18030(iqoa, oq, olen, result, true)
+  of CHARSET_GB18030:
+    stream.encodeGb18030(iqoa, oq, olen, result)
+  of CHARSET_BIG5:
+    stream.encodeBig5(iqoa, oq, olen, result)
+  of CHARSET_EUC_JP:
+    stream.encodeEUCJP(iqoa, oq, olen, result)
+  of CHARSET_ISO_2022_JP:
+    stream.encodeISO2022JP(iqoa, oq, olen, result)
+  of CHARSET_SHIFT_JIS:
+    stream.encodeShiftJIS(iqoa, oq, olen, result)
+  of CHARSET_EUC_KR:
+    stream.encodeEUCKR(iqoa, oq, olen, result)
+  of CHARSET_X_USER_DEFINED:
+    stream.encodeXUserDefined(iqoa, oq, olen, result)
+  of CHARSET_UNKNOWN:
+    doAssert false, "Somebody forgot to set the character set here"
   else: discard
   stream.checkEnd(oq, olen, result)
 
diff --git a/src/io/lineedit.nim b/src/io/lineedit.nim
index 66a4eede..ac10df06 100644
--- a/src/io/lineedit.nim
+++ b/src/io/lineedit.nim
@@ -1,10 +1,14 @@
-import unicode
-import strutils
 import sequtils
+import streams
+import strutils
+import unicode
 
 import bindings/quickjs
 import buffer/cell
+import data/charset
 import display/term
+import encoding/decoderstream
+import encoding/encoderstream
 import js/javascript
 import types/color
 import utils/opt
@@ -42,32 +46,13 @@ jsDestructor(LineEdit)
 func newLineHistory*(): LineHistory =
   return LineHistory()
 
-const colorFormat = (func(): Format =
-  result = newFormat()
-  result.fgcolor = cellColor(ANSI_BLUE)
-)()
-const defaultFormat = newFormat()
 proc printesc(edit: LineEdit, rs: seq[Rune]) =
-  var s = ""
-  var format = newFormat()
-  for r in rs:
-    if r.isControlChar():
-      s &= edit.term.processFormat(format, colorFormat)
-    else:
-      s &= edit.term.processFormat(format, defaultFormat)
-    s &= r
-  edit.term.write(s)
+  var dummy = 0
+  edit.term.write(edit.term.processOutputString0(rs.items, true, dummy))
 
-proc printesc(edit: LineEdit, s: string) =
-  var s = ""
-  var format = newFormat()
-  for r in s.runes:
-    if r.isControlChar():
-      s &= edit.term.processFormat(format, colorFormat)
-    else:
-      s &= edit.term.processFormat(format, defaultFormat)
-    s &= r
-  edit.term.write(s)
+proc print(edit: LineEdit, s: string) =
+  var dummy = 0
+  edit.term.write(edit.term.processOutputString(s, dummy))
 
 template kill0(edit: LineEdit, i: int) =
   edit.space(i)
@@ -126,7 +111,7 @@ proc redraw(state: LineEdit) =
   state.begin0()
   let os = state.news.substr(state.shift, state.shift + state.displen)
   if state.hide:
-    state.printesc('*'.repeat(os.width()))
+    state.print('*'.repeat(os.width()))
   else:
     state.printesc(os)
   state.space(max(state.maxwidth - state.minlen - os.width(), 0))
@@ -172,7 +157,7 @@ proc insertCharseq(edit: LineEdit, cs: var seq[Rune]) =
     edit.news &= cs
     edit.cursor += cs.len
     if edit.hide:
-      edit.printesc('*'.repeat(cs.width()))
+      edit.print('*'.repeat(cs.width()))
     else:
       edit.printesc(cs)
   else:
@@ -200,11 +185,25 @@ proc backspace(edit: LineEdit) {.jsfunc.} =
     else:
       edit.fullRedraw()
 
-proc write*(edit: LineEdit, s: string): bool {.jsfunc.} =
-  if validateUtf8(s) == -1:
-    var cs = s.toRunes()
-    edit.insertCharseq(cs)
-    return true
+const buflen = 128
+var buf {.threadVar.}: array[buflen, uint32]
+proc write*(edit: LineEdit, s: string, cs: Charset): bool =
+  let ss = newStringStream(s)
+  let ds = newDecoderStream(ss, cs = cs, buflen = buflen,
+    errormode = DECODER_ERROR_MODE_FATAL)
+  var cseq: seq[Rune]
+  while not ds.atEnd:
+    let n = ds.readData(buf)
+    for i in 0 ..< n div 4:
+      let r = cast[Rune](buf[i])
+      cseq.add(r)
+  if ds.failed:
+    return false
+  edit.insertCharseq(cseq)
+  return true
+
+proc write(edit: LineEdit, s: string): bool {.jsfunc.} =
+  edit.write(s, CHARSET_UTF_8)
 
 proc delete(edit: LineEdit) {.jsfunc.} =
   if edit.cursor >= 0 and edit.cursor < edit.news.len:
diff --git a/src/io/runestream.nim b/src/io/runestream.nim
new file mode 100644
index 00000000..6facda91
--- /dev/null
+++ b/src/io/runestream.nim
@@ -0,0 +1,31 @@
+import streams
+
+type RuneStream* = ref object of Stream
+  at: int # index in u32 (i.e. position * 4)
+  source: seq[uint32]
+
+proc runeClose(s: Stream) =
+  let s = cast[RuneStream](s)
+  s.source.setLen(0)
+
+proc runeReadData(s: Stream, buffer: pointer, bufLen: int): int =
+  let s = cast[RuneStream](s)
+  let L = min(bufLen, s.source.len - s.at)
+  if s.source.len == s.at:
+    return
+  copyMem(buffer, addr s.source[s.at], L * sizeof(uint32))
+  s.at += L
+  assert s.at <= s.source.len
+  return L * sizeof(uint32)
+
+proc runeAtEnd(s: Stream): bool =
+  let s = cast[RuneStream](s)
+  return s.at == s.source.len
+
+proc newRuneStream*(source: openarray[uint32]): RuneStream =
+  return RuneStream(
+    source: @source,
+    closeImpl: runeClose,
+    readDataImpl: runeReadData,
+    atEndImpl: runeAtEnd
+  )