about summary refs log tree commit diff stats
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/config/config.nim2
-rw-r--r--src/config/mailcap.nim2
-rw-r--r--src/display/lineedit.nim18
-rw-r--r--src/display/term.nim16
-rw-r--r--src/html/chadombuilder.nim129
-rw-r--r--src/html/dom.nim25
-rw-r--r--src/io/posixstream.nim8
-rw-r--r--src/js/encoding.nim197
-rw-r--r--src/js/jstypes.nim3
-rw-r--r--src/loader/loader.nim2
-rw-r--r--src/loader/response.nim18
-rw-r--r--src/local/client.nim2
-rw-r--r--src/local/container.nim2
-rw-r--r--src/local/pager.nim2
-rw-r--r--src/main.nim2
-rw-r--r--src/render/rendertext.nim79
-rw-r--r--src/server/buffer.nim208
-rw-r--r--src/version.nim4
18 files changed, 378 insertions, 341 deletions
diff --git a/src/config/config.nim b/src/config/config.nim
index 7be0c4a2..79ef263b 100644
--- a/src/config/config.nim
+++ b/src/config/config.nim
@@ -24,7 +24,7 @@ import types/url
 import utils/mimeguess
 import utils/twtstr
 
-import chakasu/charset
+import chagashi/charset
 
 type
   ColorMode* = enum
diff --git a/src/config/mailcap.nim b/src/config/mailcap.nim
index 0caf7f17..d5d17eae 100644
--- a/src/config/mailcap.nim
+++ b/src/config/mailcap.nim
@@ -8,7 +8,7 @@ import types/url
 import types/opt
 import utils/twtstr
 
-import chakasu/charset
+import chagashi/charset
 
 type
   MailcapParser = object
diff --git a/src/display/lineedit.nim b/src/display/lineedit.nim
index 7a5067eb..6969171e 100644
--- a/src/display/lineedit.nim
+++ b/src/display/lineedit.nim
@@ -1,4 +1,3 @@
-import std/streams
 import std/strutils
 import std/unicode
 
@@ -10,9 +9,9 @@ import types/opt
 import utils/strwidth
 import utils/twtstr
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
+import chagashi/validator
+import chagashi/decoder
 
 type
   LineEditState* = enum
@@ -155,15 +154,14 @@ proc backspace(edit: LineEdit) {.jsfunc.} =
 
 proc write*(edit: LineEdit, s: string, cs: Charset): bool =
   if cs == CHARSET_UTF_8:
-    if s.validateUtf8() != -1:
+    if s.validateUTF8Surr() != -1:
       return false
     edit.insertCharseq(s)
   else:
-    let ss = newStringStream(s)
-    let ds = newDecoderStream(ss, cs, errormode = DECODER_ERROR_MODE_FATAL)
-    let es = newEncoderStream(ds, CHARSET_UTF_8)
-    let s = es.readAll()
-    if ds.failed or es.failed:
+    let td = newTextDecoder(cs)
+    var success = false
+    let s = td.decodeAll(s, success)
+    if not success:
       return false
     edit.insertCharseq(s)
   return true
diff --git a/src/display/term.nim b/src/display/term.nim
index fd6271f3..afe84db6 100644
--- a/src/display/term.nim
+++ b/src/display/term.nim
@@ -16,9 +16,9 @@ import types/opt
 import utils/strwidth
 import utils/twtstr
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
+import chagashi/encoder
+import chagashi/validator
 
 export isatty
 
@@ -384,7 +384,7 @@ proc setTitle*(term: Terminal, title: string) =
     term.outfile.write(XTERM_TITLE(title))
 
 proc processOutputString*(term: Terminal, str: string, w: var int): string =
-  if str.validateUtf8() != -1:
+  if str.validateUTF8Surr() != -1:
     return "?"
   # twidth wouldn't work here, the view may start at the nth character.
   # pager must ensure tabs are converted beforehand.
@@ -397,11 +397,9 @@ proc processOutputString*(term: Terminal, str: string, w: var int): string =
     # The output encoding matches the internal representation.
     return str
   else:
-    # Output is not utf-8, so we must convert back to utf-32 and then encode.
-    let ss = newStringStream(str)
-    let ds = newDecoderStream(ss)
-    let es = newEncoderStream(ds, term.cs, errormode = ENCODER_ERROR_MODE_FATAL)
-    return es.readAll()
+    # Output is not utf-8, so we must encode it first.
+    var success = false
+    return newTextEncoder(term.cs).encodeAll(str, success)
 
 proc generateFullOutput(term: Terminal, grid: FixedGrid): string =
   var format = Format()
diff --git a/src/html/chadombuilder.nim b/src/html/chadombuilder.nim
index 221cf7f8..66e3a518 100644
--- a/src/html/chadombuilder.nim
+++ b/src/html/chadombuilder.nim
@@ -1,6 +1,5 @@
 import std/deques
 import std/options
-import std/streams
 
 import html/catom
 import html/dom
@@ -10,37 +9,30 @@ import js/fromjs
 import js/javascript
 import types/url
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
 
 import chame/htmlparser
 import chame/tags
 
+export htmlparser.ParseResult
+
 # DOMBuilder implementation for Chawan.
 
-type CharsetConfidence = enum
+type CharsetConfidence* = enum
   ccTentative, ccCertain, ccIrrelevant
 
 type
   HTML5ParserWrapper* = ref object
     parser: HTML5Parser[Node, CAtom]
-    charsetStack: seq[Charset]
-    seekable: bool
     builder*: ChaDOMBuilder
     opts: HTML5ParserOpts[Node, CAtom]
-    stream: StringStream
-    encoder: EncoderStream
-    decoder: DecoderStream
     # hack so we don't have to worry about leaks or the GC deallocating parser
     refs: seq[Document]
     stoppedFromScript: bool
-    needsBOMSniff: bool
-    wasICE: bool # inhibitCheckEnd
 
   ChaDOMBuilder = ref object of DOMBuilder[Node, CAtom]
-    charset: Charset
-    confidence: CharsetConfidence
+    charset*: Charset
+    confidence*: CharsetConfidence
     document*: Document
     factory: CAtomFactory
     poppedScript: HTMLScriptElement
@@ -80,7 +72,8 @@ proc finish(builder: ChaDOMBuilder) =
     script.execute()
   #TODO events
 
-proc restart(builder: ChaDOMBuilder, wrapper: HTML5ParserWrapper) =
+proc restart*(wrapper: HTML5ParserWrapper, charset: Charset) =
+  let builder = wrapper.builder
   let document = newDocument(builder.factory)
   document.setActiveParser(wrapper)
   wrapper.refs.add(document)
@@ -92,7 +85,9 @@ proc restart(builder: ChaDOMBuilder, wrapper: HTML5ParserWrapper) =
     document.window = window
     window.document = document
   builder.document = document
+  builder.charset = charset
   assert document.factory != nil
+  wrapper.parser = initHTML5Parser(builder, wrapper.opts)
 
 proc setQuirksModeImpl(builder: ChaDOMBuilder, quirksMode: QuirksMode) =
   if not builder.document.parser_cannot_change_the_mode_flag:
@@ -214,7 +209,7 @@ proc elementPoppedImpl(builder: ChaDOMBuilder, element: Node) =
     builder.poppedScript = HTMLScriptElement(element)
 
 proc newChaDOMBuilder(url: URL, window: Window, factory: CAtomFactory,
-    confidence: CharsetConfidence): ChaDOMBuilder =
+    confidence: CharsetConfidence, charset = DefaultCharset): ChaDOMBuilder =
   let document = newDocument(factory)
   document.contentType = "text/html"
   document.url = url
@@ -224,7 +219,8 @@ proc newChaDOMBuilder(url: URL, window: Window, factory: CAtomFactory,
   return ChaDOMBuilder(
     document: document,
     factory: factory,
-    confidence: confidence
+    confidence: confidence,
+    charset: charset
   )
 
 # https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
@@ -264,68 +260,22 @@ proc parseHTMLFragment*(element: Element, s: string): seq[Node] =
   builder.finish()
   return root.childList
 
-#TODO this should be handled by decoderstream or buffer
-proc bomSniff(wrapper: HTML5ParserWrapper): Charset =
-  let stream = wrapper.stream
-  let op = stream.getPosition()
-  if op + 2 >= stream.data.len:
-    return CHARSET_UNKNOWN
-  let bom = stream.readStr(2)
-  if bom == "\xFE\xFF":
-    return CHARSET_UTF_16_BE
-  if bom == "\xFF\xFE":
-    return CHARSET_UTF_16_LE
-  if bom == "\xEF\xBB":
-    if op + 3 < stream.data.len and stream.readChar() == '\xBF':
-      return CHARSET_UTF_8
-  wrapper.stream.setPosition(op)
-  return CHARSET_UNKNOWN
-
-proc switchCharset(wrapper: HTML5ParserWrapper) =
-  let builder = wrapper.builder
-  builder.charset = wrapper.charsetStack.pop()
-  if wrapper.seekable:
-    builder.confidence = ccTentative # used in the next iteration
-  else:
-    builder.confidence = ccCertain
-  let em = if wrapper.charsetStack.len == 0 or not wrapper.seekable:
-    DECODER_ERROR_MODE_REPLACEMENT
-  else:
-    DECODER_ERROR_MODE_FATAL
-  let ice = wrapper.decoder == nil or wrapper.wasICE
-  wrapper.parser = initHTML5Parser(builder, wrapper.opts)
-  wrapper.decoder = newDecoderStream(wrapper.stream, builder.charset,
-    errormode = em)
-  wrapper.decoder.setInhibitCheckEnd(ice)
-  wrapper.wasICE = ice
-  wrapper.encoder = newEncoderStream(wrapper.decoder, CHARSET_UTF_8,
-    errormode = ENCODER_ERROR_MODE_FATAL)
-
-proc newHTML5ParserWrapper*(stream: StringStream, window: Window, url: URL,
-    factory: CAtomFactory, charsets: seq[Charset], seekable: bool):
-    HTML5ParserWrapper =
+proc newHTML5ParserWrapper*(window: Window, url: URL, factory: CAtomFactory,
+    charset: Charset): HTML5ParserWrapper =
   let opts = HTML5ParserOpts[Node, CAtom](
     isIframeSrcdoc: false, #TODO?
     scripting: window != nil and window.settings.scripting
   )
-  let builder = newChaDOMBuilder(url, window, factory, ccTentative)
+  let builder = newChaDOMBuilder(url, window, factory, ccTentative, charset)
   let wrapper = HTML5ParserWrapper(
-    seekable: seekable,
     builder: builder,
     opts: opts,
-    stream: stream,
-    needsBOMSniff: seekable
+    parser: initHTML5Parser(builder, opts)
   )
   builder.document.setActiveParser(wrapper)
-  if charsets.len == 0:
-    wrapper.charsetStack = @[DefaultCharset] # UTF-8
-  else:
-    for i in countdown(charsets.high, 0):
-      wrapper.charsetStack.add(charsets[i])
-  wrapper.switchCharset()
   return wrapper
 
-proc parseBuffer(wrapper: HTML5ParserWrapper, buffer: openArray[char]):
+proc parseBuffer*(wrapper: HTML5ParserWrapper, buffer: openArray[char]):
     ParseResult =
   let builder = wrapper.builder
   let document = builder.document
@@ -390,50 +340,7 @@ proc CDB_parseDocumentWriteChunk(wrapper: pointer) {.exportc.} =
   if res == PRES_STOP:
     wrapper.stoppedFromScript = true
 
-proc parseAll*(wrapper: HTML5ParserWrapper): bool =
-  let builder = wrapper.builder
-  if wrapper.needsBOMSniff:
-    if wrapper.stream.getPosition() + 3 >= wrapper.stream.data.len:
-      return true
-    let scs = wrapper.bomSniff()
-    if scs != CHARSET_UNKNOWN:
-      builder.confidence = ccCertain
-      wrapper.charsetStack = @[scs]
-      wrapper.seekable = false
-      wrapper.switchCharset()
-    wrapper.needsBOMSniff = false
-  let buffer = wrapper.encoder.readAll()
-  if wrapper.decoder.failed:
-    assert wrapper.seekable
-    # Retry with another charset.
-    builder.restart(wrapper)
-    wrapper.switchCharset()
-    return false
-  if buffer.len == 0:
-    return true
-  let res = wrapper.parseBuffer(buffer)
-  if res == PRES_STOP:
-    # A meta tag describing the charset has been found; force use of this
-    # charset.
-    builder.restart(wrapper)
-    wrapper.charsetStack.add(builder.charset)
-    wrapper.seekable = false
-    wrapper.switchCharset()
-    return false
-  return true
-
 proc finish*(wrapper: HTML5ParserWrapper) =
-  if wrapper.needsBOMSniff:
-    let scs = wrapper.bomSniff()
-    if scs != CHARSET_UNKNOWN:
-      wrapper.builder.confidence = ccCertain
-      wrapper.charsetStack = @[scs]
-      wrapper.seekable = false
-      wrapper.switchCharset()
-    wrapper.needsBOMSniff = false
-  wrapper.decoder.setInhibitCheckEnd(false)
-  wrapper.wasICE = false
-  doAssert wrapper.parseAll()
   wrapper.parser.finish()
   wrapper.builder.finish()
   for r in wrapper.refs:
diff --git a/src/html/dom.nim b/src/html/dom.nim
index d5b21622..e5d871ba 100644
--- a/src/html/dom.nim
+++ b/src/html/dom.nim
@@ -41,9 +41,9 @@ import utils/mimeguess
 import utils/strwidth
 import utils/twtstr
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
+import chagashi/decoder
+import chagashi/validator
 
 import chame/tags
 
@@ -2744,13 +2744,8 @@ proc loadResource(window: Window, link: HTMLLinkElement) =
         res.unregisterFun()
     ).then(proc(s: JSResult[string]) =
       if s.isOk:
-        #TODO this is extremely inefficient, and text() should return
-        # utf8 anyways
-        let ss = newStringStream(s.get)
-        #TODO non-utf-8 css
-        let ds = newDecoderStream(ss, cs = CHARSET_UTF_8)
-        let source = newEncoderStream(ds, cs = CHARSET_UTF_8)
-        link.sheet = parseStylesheet(source, window.factory)
+        #TODO non-utf-8 css?
+        link.sheet = parseStylesheet(newStringStream(s.get), window.factory)
         window.document.cachedSheetsInvalid = true
     )
     window.loadingResourcePromises.add(p)
@@ -3441,12 +3436,12 @@ proc fetchClassicScript(element: HTMLScriptElement, url: URL,
   if response.res != 0:
     element.onComplete(ScriptResult(t: RESULT_NULL))
     return
-  let cs = if cs == CHARSET_UNKNOWN:
-    CHARSET_UTF_8
+  #TODO make this non-blocking somehow
+  let s = response.body.readAll()
+  let source = if cs in {CHARSET_UNKNOWN, CHARSET_UTF_8}:
+    s.toValidUTF8()
   else:
-    cs
-  let decoder = newDecoderStream(response.body, cs = cs)
-  let source = newEncoderStream(decoder).readAll()
+    newTextDecoder(cs).decodeAll(s)
   let script = window.jsctx.createClassicScript(source, url, options, false)
   element.onComplete(ScriptResult(t: RESULT_SCRIPT, script: script))
 
diff --git a/src/io/posixstream.nim b/src/io/posixstream.nim
index 683d72b8..c9c1c234 100644
--- a/src/io/posixstream.nim
+++ b/src/io/posixstream.nim
@@ -74,7 +74,13 @@ proc psClose(s: Stream) =
 proc psReadData(s: Stream, buffer: pointer, len: int): int =
   let s = PosixStream(s)
   assert len != 0 and s.blocking
-  return s.recvData(buffer, len)
+  result = 0
+  while result < len:
+    let p = addr cast[ptr UncheckedArray[uint8]](buffer)[result]
+    let n = s.recvData(p, len - result)
+    if n == 0:
+      break
+    result += n
 
 proc psWriteData(s: Stream, buffer: pointer, len: int) =
   let s = PosixStream(s)
diff --git a/src/js/encoding.nim b/src/js/encoding.nim
index 0e6643fb..b2ef1d1b 100644
--- a/src/js/encoding.nim
+++ b/src/js/encoding.nim
@@ -1,98 +1,189 @@
 import std/streams
 
+import bindings/quickjs
 import js/error
 import js/javascript
 import js/jstypes
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
+import chagashi/decoder
+import chagashi/decodercore
+import chagashi/validator
+import chagashi/validatorcore
 
 type
-  TextEncoder = ref object
+  JSTextEncoder = ref object
 
-  TextDecoder = ref object
+  JSTextDecoder = ref object
     encoding: Charset
-    errorMode: DecoderErrorMode
     ignoreBOM {.jsget.}: bool
+    fatal {.jsget.}: bool
     doNotFlush: bool
     bomSeen: bool
-    decoder: DecoderStream
-    encoder: EncoderStream # to return the string to JS
-    istream: StringStream
+    td: TextDecoder
+    tv: ref TextValidatorUTF8
+    validateBuf: seq[uint8]
 
-jsDestructor(TextDecoder)
-jsDestructor(TextEncoder)
+jsDestructor(JSTextDecoder)
+jsDestructor(JSTextEncoder)
 
 type TextDecoderOptions = object of JSDict
   fatal: bool
   ignoreBOM: bool
 
-func newTextDecoder(label = "utf-8", options = TextDecoderOptions()):
-    JSResult[TextDecoder] {.jsctor.} =
-  let errorMode = if options.fatal:
-    DECODER_ERROR_MODE_FATAL
-  else:
-    DECODER_ERROR_MODE_REPLACEMENT
+func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()):
+    JSResult[JSTextDecoder] {.jsctor.} =
   let encoding = getCharset(label)
   if encoding in {CHARSET_UNKNOWN, CHARSET_REPLACEMENT}:
     return err(newRangeError("Invalid encoding label"))
-  return ok(TextDecoder(
-    errorMode: errorMode,
+  return ok(JSTextDecoder(
     ignoreBOM: options.ignoreBOM,
+    fatal: options.fatal,
+    td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil,
+    tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil,
     encoding: encoding
   ))
 
+type Growbuf = object
+  p: ptr UncheckedArray[uint8]
+  cap: int
+  len: int
+
+{.warning[Deprecated]: off.}:
+  proc `=destroy`(growbuf: var Growbuf) =
+    if growbuf.p != nil:
+      dealloc(growbuf.p)
+      growbuf.p = nil
+
+const BufferSize = 128
+proc grow(buf: var Growbuf) =
+  if buf.cap == 0:
+    buf.cap = BufferSize
+  else:
+    buf.cap *= 2
+  buf.p = cast[ptr UncheckedArray[uint8]](buf.p.realloc(buf.cap))
+
+proc write(buf: var Growbuf, s: openArray[uint8]) =
+  if buf.len + s.len > buf.cap:
+    buf.grow()
+  if s.len > 0:
+    copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len)
+  buf.len += s.len
+
+proc write(buf: var Growbuf, s: string) =
+  if buf.len + s.len > buf.cap:
+    buf.grow()
+  if s.len > 0:
+    copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len)
+  buf.len += s.len
+
+proc decode0(this: JSTextDecoder, ctx: JSContext, input: JSArrayBufferView,
+    stream: bool): JSResult[JSValue] =
+  var oq = Growbuf(
+    p: cast[ptr UncheckedArray[uint8]](alloc(BufferSize)),
+    len: 0,
+    cap: BufferSize
+  )
+  var i = 0
+  let H = int(input.abuf.len) - 1
+  template handle_error =
+    if this.fatal:
+      return errTypeError("Failed to decode string")
+    oq.write("\uFFFD")
+    i = this.td.i
+  while true:
+    case this.td.decode(input.abuf.p.toOpenArray(i, H),
+      oq.p.toOpenArray(0, oq.cap - 1), oq.len)
+    of tdrDone:
+      if not stream:
+        case this.td.finish()
+        of tdfrDone: discard
+        of tdfrError: handle_error
+      break
+    of tdrError:
+      handle_error
+    of tdrReqOutput:
+      oq.grow()
+  return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
+
+proc validate0(this: JSTextDecoder, ctx: JSContext, input: JSArrayBufferView,
+    stream: bool): JSResult[JSValue] =
+  # assume input is valid; do not allocate yet
+  var oq = Growbuf(p: nil, len: 0, cap: 0)
+  var i = 0
+  let H = int(input.abuf.len) - 1
+  var n = 0
+  template handle_error =
+    if this.fatal:
+      return errTypeError("Failed to decode string")
+    # write from previous error (or beginning) to the last valid char
+    oq.write(input.abuf.p.toOpenArray(i, n))
+    oq.write("\uFFFD")
+    this.validateBuf.setLen(0)
+    i = this.tv.i
+  while true:
+    case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n)
+    of tvrDone:
+      break
+    of tvrError:
+      handle_error
+  if not stream:
+    case this.tv[].finish()
+    of tvrDone: discard
+    of tvrError: handle_error
+  if this.validateBuf.len > 0 and n > -1:
+    oq.write(this.validateBuf)
+    oq.write(input.abuf.p.toOpenArray(i, n))
+    this.validateBuf.setLen(0)
+  this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high))
+  if oq.len > 0:
+    assert oq.p != nil
+    return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
+  assert oq.p == nil
+  return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1)))
+
 type TextDecodeOptions = object of JSDict
   stream: bool
 
 #TODO AllowSharedBufferSource
-proc decode(this: TextDecoder, input = none(JSArrayBufferView),
-    options = TextDecodeOptions()): string {.jsfunc.} =
+proc decode(ctx: JSContext, this: JSTextDecoder, input = opt(JSArrayBufferView),
+    options = TextDecodeOptions()): JSResult[JSValue] {.jsfunc.} =
   if not this.doNotFlush:
-    if this.istream != nil:
-      this.istream.close()
-    if this.decoder != nil:
-      this.decoder.close()
-    if this.encoder != nil:
-      this.encoder.close()
-    this.istream = newStringStream()
-    this.decoder = newDecoderStream(this.istream, cs = this.encoding,
-      errormode = this.errorMode)
-    this.encoder = newEncoderStream(this.decoder, cs = CHARSET_UTF_8)
+    if this.td != nil:
+      this.td = newTextDecoder(this.encoding)
+    else:
+      assert this.tv != nil
+      this.tv = (ref TextValidatorUTF8)()
     this.bomSeen = false
   if this.doNotFlush != options.stream:
     this.doNotFlush = options.stream
-    this.decoder.setInhibitCheckEnd(options.stream)
   if input.isSome:
-    let input = input.get
-    let pos = this.istream.getPosition()
-    #TODO input offset?
-    this.istream.writeData(input.abuf.p, int(input.abuf.len))
-    this.istream.setPosition(pos)
-  #TODO this should return a JSString, so we do not needlessly re-encode
-  # the output. (Right now we do, implicitly through toJS.)
-  return this.encoder.readAll()
-
-func jencoding(this: TextDecoder): string {.jsfget: "encoding".} =
+    if this.td != nil:
+      return this.decode0(ctx, input.get, options.stream)
+    else:
+      assert this.encoding == CHARSET_UTF_8
+      # just validate
+      return this.validate0(ctx, input.get, options.stream)
+  return ok(JS_NewString(ctx, ""))
+
+func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} =
   return $this.encoding
 
-func fatal(this: TextDecoder): bool {.jsfget.} =
-  return this.errorMode == DECODER_ERROR_MODE_FATAL
-
-func newTextEncoder(): TextEncoder {.jsctor.} =
-  return TextEncoder()
+func newTextEncoder(): JSTextEncoder {.jsctor.} =
+  return JSTextEncoder()
 
-func jencoding(this: TextEncoder): string {.jsfget: "encoding".} =
+func jencoding(this: JSTextEncoder): string {.jsfget: "encoding".} =
   return "utf-8"
 
 proc dealloc_wrap(rt: JSRuntime, opaque, p: pointer) {.cdecl.} =
   dealloc(p)
 
-proc encode(this: TextEncoder, input = ""): JSUint8Array {.jsfunc.} =
-  # input is already UTF-8 here :P
+proc encode(this: JSTextEncoder, input = ""): JSUint8Array {.jsfunc.} =
+  # we have to validate input first :/
+  #TODO it is possible to do less copies here...
+  var input = input.toValidUTF8()
   let buf = cast[ptr UncheckedArray[uint8]](alloc(input.len))
-  copyMem(buf, unsafeAddr input[0], input.len)
+  copyMem(buf, addr input[0], input.len)
   let abuf = JSArrayBuffer(
     p: buf,
     len: csize_t(input.len),
@@ -107,5 +198,5 @@ proc encode(this: TextEncoder, input = ""): JSUint8Array {.jsfunc.} =
 #TODO encodeInto
 
 proc addEncodingModule*(ctx: JSContext) =
-  ctx.registerType(TextDecoder)
-  ctx.registerType(TextEncoder)
+  ctx.registerType(JSTextDecoder, name = "TextDecoder")
+  ctx.registerType(JSTextEncoder, name = "TextEncoder")
diff --git a/src/js/jstypes.nim b/src/js/jstypes.nim
index b8ef6c55..5336f067 100644
--- a/src/js/jstypes.nim
+++ b/src/js/jstypes.nim
@@ -30,3 +30,6 @@ type
     abuf*: JSArrayBuffer
     offset*: csize_t # offset into the buffer
     nmemb*: csize_t # number of members
+
+func high*(abuf: JSArrayBuffer): int =
+  return int(abuf.len) - 1
diff --git a/src/loader/loader.nim b/src/loader/loader.nim
index d859ae34..3c974c95 100644
--- a/src/loader/loader.nim
+++ b/src/loader/loader.nim
@@ -51,7 +51,7 @@ import types/url
 import utils/mimeguess
 import utils/twtstr
 
-import chakasu/charset
+import chagashi/charset
 
 export request
 export response
diff --git a/src/loader/response.nim b/src/loader/response.nim
index 0869a73e..6168aff6 100644
--- a/src/loader/response.nim
+++ b/src/loader/response.nim
@@ -1,5 +1,4 @@
 import std/streams
-import std/unicode
 
 import bindings/quickjs
 import io/promise
@@ -11,9 +10,9 @@ import loader/request
 import types/blob
 import types/url
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
+import chagashi/decoder
+import chagashi/validator
 
 type
   ResponseType* = enum
@@ -106,13 +105,12 @@ proc text*(response: Response): Promise[JSResult[string]] {.jsfunc.} =
       CHARSET_UTF_8
     else:
       response.charset
-    if cs == CHARSET_UTF_8 and s.validateUtf8() == -1:
-      ok(s)
+    #TODO this is inefficient
+    # maybe add a JS type that turns a seq[char] into JS strings
+    if cs in {CHARSET_UTF_8, CHARSET_UNKNOWN}:
+      ok(s.toValidUTF8())
     else:
-      let ss = newStringStream(s)
-      let ds = newDecoderStream(ss, cs)
-      let es = newEncoderStream(ds, CHARSET_UTF_8)
-      return ok(es.readAll())
+      ok(newTextDecoder(cs).decodeAll(s))
   )
 
 proc blob*(response: Response): Promise[JSResult[Blob]] {.jsfunc.} =
diff --git a/src/local/client.nim b/src/local/client.nim
index 291e1b0d..e0a453db 100644
--- a/src/local/client.nim
+++ b/src/local/client.nim
@@ -51,7 +51,7 @@ import utils/twtstr
 import xhr/formdata
 import xhr/xmlhttprequest
 
-import chakasu/charset
+import chagashi/charset
 
 type
   Client* = ref object
diff --git a/src/local/container.nim b/src/local/container.nim
index 3814f43d..e61d4fbf 100644
--- a/src/local/container.nim
+++ b/src/local/container.nim
@@ -29,7 +29,7 @@ import utils/mimeguess
 import utils/strwidth
 import utils/twtstr
 
-import chakasu/charset
+import chagashi/charset
 
 type
   CursorPosition* = object
diff --git a/src/local/pager.nim b/src/local/pager.nim
index a5b63523..64a11964 100644
--- a/src/local/pager.nim
+++ b/src/local/pager.nim
@@ -42,7 +42,7 @@ import types/url
 import utils/strwidth
 import utils/twtstr
 
-import chakasu/charset
+import chagashi/charset
 
 type
   LineMode* = enum
diff --git a/src/main.nim b/src/main.nim
index d6a9abce..9d782a90 100644
--- a/src/main.nim
+++ b/src/main.nim
@@ -15,7 +15,7 @@ import types/opt
 import utils/strwidth
 import utils/twtstr
 
-import chakasu/charset
+import chagashi/charset
 
 proc main() =
   let params = commandLineParams()
diff --git a/src/render/rendertext.nim b/src/render/rendertext.nim
index 56a0b2ba..27992215 100644
--- a/src/render/rendertext.nim
+++ b/src/render/rendertext.nim
@@ -5,69 +5,29 @@ import std/unicode
 import types/cell
 import utils/strwidth
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
-
-type StreamRenderer* = object
+type StreamRenderer* = ref object
   ansiparser: AnsiCodeParser
   format: Format
   af: bool
   stream: Stream
-  decoder: DecoderStream
-  encoder: EncoderStream
-  charsets: seq[Charset]
   newline: bool
   w: int
   j: int # byte in line
 
-#TODO pass bool for whether we can rewind
-proc newStreamRenderer*(stream: Stream, charsets0: openArray[Charset]):
-    ref StreamRenderer =
-  var charsets = newSeq[Charset](charsets0.len)
-  for i in 0 ..< charsets.len:
-    charsets[i] = charsets0[charsets.high - i]
-  if charsets.len == 0:
-    charsets.add(DefaultCharset)
-  let cs = charsets.pop()
-  let em = if charsets.len > 0:
-    DECODER_ERROR_MODE_FATAL
-  else:
-    DECODER_ERROR_MODE_REPLACEMENT
-  let decoder = newDecoderStream(stream, cs, errormode = em)
-  decoder.setInhibitCheckEnd(true)
-  let encoder = newEncoderStream(decoder)
-  return (ref StreamRenderer)(
-    stream: stream,
-    decoder: decoder,
-    encoder: encoder,
-    format: Format(),
-    charsets: charsets,
-    ansiparser: AnsiCodeParser(
-      state: PARSE_DONE
-    )
-  )
+proc newStreamRenderer*(): StreamRenderer =
+  return StreamRenderer(ansiparser: AnsiCodeParser(state: PARSE_DONE))
 
-proc rewind(renderer: var StreamRenderer) =
-  let cs = renderer.charsets.pop()
-  let em = if renderer.charsets.len > 0:
-    DECODER_ERROR_MODE_FATAL
-  else:
-    DECODER_ERROR_MODE_REPLACEMENT
-  let decoder = newDecoderStream(renderer.stream, cs, errormode = em)
-  decoder.setInhibitCheckEnd(true)
-  renderer.decoder = decoder
-  renderer.encoder = newEncoderStream(decoder)
+proc rewind*(renderer: StreamRenderer) =
   renderer.format = Format()
   renderer.ansiparser.state = PARSE_DONE
 
-proc addFormat(grid: var FlexibleGrid, renderer: var StreamRenderer) =
+proc addFormat(grid: var FlexibleGrid, renderer: StreamRenderer) =
   if renderer.af:
     renderer.af = false
     if renderer.j == grid[^1].str.len:
       grid[^1].addFormat(renderer.w, renderer.format)
 
-proc processBackspace(grid: var FlexibleGrid, renderer: var StreamRenderer,
+proc processBackspace(grid: var FlexibleGrid, renderer: StreamRenderer,
     r: Rune): bool =
   let pj = renderer.j
   var cr: Rune
@@ -105,8 +65,7 @@ proc processBackspace(grid: var FlexibleGrid, renderer: var StreamRenderer,
   grid[^1].str.setLen(renderer.j)
   return false
 
-proc processAscii(grid: var FlexibleGrid, renderer: var StreamRenderer,
-    c: char) =
+proc processAscii(grid: var FlexibleGrid, renderer: StreamRenderer, c: char) =
   case c
   of '\b':
     if renderer.j == 0:
@@ -129,8 +88,10 @@ proc processAscii(grid: var FlexibleGrid, renderer: var StreamRenderer,
     renderer.w += Rune(c).twidth(renderer.w)
     inc renderer.j
 
-proc renderChunk(grid: var FlexibleGrid, renderer: var StreamRenderer,
-    buf: string) =
+proc renderChunk*(grid: var FlexibleGrid, renderer: StreamRenderer,
+    buf: openArray[char]) =
+  if grid.len == 0:
+    grid.addLine()
   var i = 0
   while i < buf.len:
     if renderer.newline:
@@ -158,21 +119,3 @@ proc renderChunk(grid: var FlexibleGrid, renderer: var StreamRenderer,
       grid[^1].str &= r
       renderer.w += r.twidth(renderer.w)
       renderer.j += i - pi
-
-proc renderStream*(grid: var FlexibleGrid, renderer: var StreamRenderer): bool =
-  let buf = renderer.encoder.readAll()
-  if renderer.decoder.failed:
-    renderer.rewind()
-    grid.setLen(0)
-    return false
-  if grid.len == 0:
-    grid.addLine()
-  grid.renderChunk(renderer, buf)
-  return true
-
-proc finishRender*(grid: var FlexibleGrid, renderer: var StreamRenderer) =
-  renderer.decoder.setInhibitCheckEnd(false)
-  let buf = renderer.decoder.readAll()
-  if grid.len == 0:
-    grid.addLine()
-  grid.renderChunk(renderer, buf)
diff --git a/src/server/buffer.nim b/src/server/buffer.nim
index 27308ff3..788c0aea 100644
--- a/src/server/buffer.nim
+++ b/src/server/buffer.nim
@@ -51,7 +51,10 @@ import utils/strwidth
 import utils/twtstr
 import xhr/formdata as formdata_impl
 
-import chakasu/charset
+from chagashi/decoder import newTextDecoder
+import chagashi/charset
+import chagashi/decodercore
+import chagashi/validatorcore
 
 import chame/tags
 
@@ -103,7 +106,6 @@ type
     prevstyled: StyledNode
     selector: Selector[int]
     istream: SocketStream
-    sstream: StringStream
     available: int
     state: BufferState
     prevnode: StyledNode
@@ -118,8 +120,15 @@ type
     quirkstyle: CSSStylesheet
     userstyle: CSSStylesheet
     htmlParser: HTML5ParserWrapper
-    srenderer: ref StreamRenderer
+    srenderer: StreamRenderer
     bgcolor: CellColor
+    needsBOMSniff: bool
+    seekable: bool
+    decoder: TextDecoder
+    validator: ref TextValidatorUTF8
+    validateBuf: seq[char]
+    charsetStack: seq[Charset]
+    charset: Charset
 
   InterfaceOpaque = ref object
     stream: Stream
@@ -274,11 +283,6 @@ macro task(fun: typed) =
   pfun.istask = true
   fun
 
-func charsets(buffer: Buffer): seq[Charset] =
-  if buffer.source.charset != CHARSET_UNKNOWN:
-    return @[buffer.source.charset]
-  return buffer.config.charsets
-
 func getTitleAttr(node: StyledNode): string =
   if node == nil:
     return ""
@@ -632,13 +636,115 @@ proc do_reshape(buffer: Buffer) =
     buffer.lines.renderDocument(buffer.bgcolor, styledRoot, buffer.attrs)
     buffer.prevstyled = styledRoot
 
-proc processData(buffer: Buffer): bool =
+proc processData0(buffer: Buffer, data: openArray[char]): bool =
   if buffer.ishtml:
-    let res = buffer.htmlParser.parseAll()
+    if buffer.htmlParser.parseBuffer(data) == PRES_STOP:
+      buffer.charsetStack = @[buffer.htmlParser.builder.charset]
+      return false
     buffer.document = buffer.htmlParser.builder.document
-    return res
   else:
-    return buffer.lines.renderStream(buffer.srenderer[])
+    buffer.lines.renderChunk(buffer.srenderer, data)
+  true
+
+func canSwitch(buffer: Buffer): bool {.inline.} =
+  if buffer.ishtml and buffer.htmlParser.builder.confidence != ccTentative:
+    return false
+  return buffer.charsetStack.len > 0
+
+proc initDecoder(buffer: Buffer) =
+  if buffer.charset != CHARSET_UTF_8:
+    buffer.decoder = newTextDecoder(buffer.charset)
+  else:
+    buffer.validator = (ref TextValidatorUTF8)()
+
+proc switchCharset(buffer: Buffer) =
+  buffer.charset = buffer.charsetStack.pop()
+  buffer.initDecoder()
+  if buffer.ishtml:
+    buffer.htmlParser.restart(buffer.charset)
+  else:
+    buffer.srenderer.rewind()
+    buffer.lines.setLen(0)
+
+const BufferSize = 16384
+
+proc decodeData(buffer: Buffer, iq: openArray[uint8]): bool =
+  var oq {.noinit.}: array[BufferSize, char]
+  var n = 0
+  while true:
+    case buffer.decoder.decode(iq, oq.toOpenArrayByte(0, oq.high), n)
+    of tdrDone:
+      if not buffer.processData0(oq.toOpenArray(0, n - 1)):
+        assert buffer.canSwitch
+        buffer.switchCharset()
+        return false
+      break
+    of tdrReqOutput:
+      # flush output buffer
+      if not buffer.processData0(oq.toOpenArray(0, n - 1)):
+        assert buffer.canSwitch
+        buffer.switchCharset()
+        return false
+      n = 0
+    of tdrError:
+      if buffer.canSwitch:
+        buffer.switchCharset()
+        return false
+      doAssert buffer.processData0("\uFFFD")
+  true
+
+proc validateData(buffer: Buffer, iq: openArray[char]): bool =
+  var pi = 0
+  var n = 0
+  while true:
+    case buffer.validator[].validate(iq.toOpenArrayByte(0, iq.high), n)
+    of tvrDone:
+      if n == -1:
+        return true
+      if buffer.validateBuf.len > 0:
+        doAssert buffer.processData0(buffer.validateBuf)
+        buffer.validateBuf.setLen(0)
+      if not buffer.processData0(iq.toOpenArray(pi, n)):
+        assert buffer.canSwitch
+        buffer.switchCharset()
+        return false
+      buffer.validateBuf.add(iq.toOpenArray(n + 1, iq.high))
+      break
+    of tvrError:
+      buffer.validateBuf.setLen(0)
+      if buffer.canSwitch:
+        buffer.switchCharset()
+        return false
+      if n > pi:
+        doAssert buffer.processData0(iq.toOpenArray(pi, n - 1))
+      doAssert buffer.processData0("\uFFFD")
+      pi = buffer.validator.i
+  true
+
+proc bomSniff(buffer: Buffer, iq: openArray[char]): int =
+  if iq[0] == '\xFE' and iq[1] == '\xFF':
+    buffer.charsetStack = @[CHARSET_UTF_16_BE]
+    buffer.switchCharset()
+    return 2
+  if iq[0] == '\xFF' and iq[1] == '\xFE':
+    buffer.charsetStack = @[CHARSET_UTF_16_LE]
+    buffer.switchCharset()
+    return 2
+  if iq[0] == '\xEF' and iq[1] == '\xBB' and iq[2] == '\xBF':
+    buffer.charsetStack = @[CHARSET_UTF_8]
+    buffer.switchCharset()
+    return 3
+  return 0
+
+proc processData(buffer: Buffer, iq: openArray[char]): bool =
+  var start = 0
+  if buffer.needsBOMSniff:
+    if iq.len >= 3: # ehm... TODO
+      start += buffer.bomSniff(iq)
+    buffer.needsBOMSniff = false
+  if buffer.decoder != nil:
+    return buffer.decodeData(iq.toOpenArrayByte(start, iq.high))
+  return buffer.validateData(iq.toOpenArray(start, iq.high))
 
 proc windowChange*(buffer: Buffer, attrs: WindowAttributes) {.proxy.} =
   buffer.attrs = attrs
@@ -717,44 +823,38 @@ proc rewind(buffer: Buffer): bool =
 
 proc setHTML(buffer: Buffer, ishtml: bool) =
   buffer.ishtml = ishtml
+  buffer.charset = buffer.charsetStack.pop()
+  buffer.initDecoder()
   if ishtml:
     let factory = newCAtomFactory()
     buffer.factory = factory
-    if buffer.config.scripting:
-      buffer.window = newWindow(
-        buffer.config.scripting,
-        buffer.config.images,
-        buffer.selector,
-        buffer.attrs,
-        factory,
-        proc(url: URL) = buffer.navigate(url),
-        some(buffer.loader)
-      )
+    let navigate = if buffer.config.scripting:
+      proc(url: URL) = buffer.navigate(url)
     else:
-      buffer.window = newWindow(
-        buffer.config.scripting,
-        buffer.config.images,
-        buffer.selector,
-        buffer.attrs,
-        factory,
-        nil,
-        some(buffer.loader)
-      )
+      nil
+    buffer.window = newWindow(
+      buffer.config.scripting,
+      buffer.config.images,
+      buffer.selector,
+      buffer.attrs,
+      factory,
+      navigate,
+      some(buffer.loader)
+    )
     buffer.htmlParser = newHTML5ParserWrapper(
-      buffer.sstream,
       buffer.window,
       buffer.url,
       buffer.factory,
-      buffer.charsets,
-      seekable = true
+      buffer.charset
     )
+    assert buffer.htmlParser.builder.document != nil
     const css = staticRead"res/ua.css"
     const quirk = css & staticRead"res/quirk.css"
     buffer.uastyle = css.parseStylesheet(factory)
     buffer.quirkstyle = quirk.parseStylesheet(factory)
     buffer.userstyle = parseStylesheet(buffer.config.userstyle, factory)
   else:
-    buffer.srenderer = newStreamRenderer(buffer.sstream, buffer.charsets)
+    buffer.srenderer = newStreamRenderer()
 
 proc connect*(buffer: Buffer): ConnectResult {.proxy.} =
   if buffer.connected:
@@ -1025,13 +1125,14 @@ proc dispatchEvent(buffer: Buffer, ctype: string, elem: Element): tuple[
       break
   return (called, canceled)
 
-const BufferSize = 16384
-
 proc finishLoad(buffer: Buffer): EmptyPromise =
   if buffer.state != LOADING_PAGE:
     let p = EmptyPromise()
     p.resolve()
     return p
+  if buffer.decoder != nil and buffer.decoder.finish() == tdfrError or
+      buffer.validator != nil and buffer.validator[].finish() == tvrError:
+    doAssert buffer.processData0("\uFFFD")
   var p: EmptyPromise
   if buffer.ishtml:
     buffer.htmlParser.finish()
@@ -1083,20 +1184,16 @@ proc onload(buffer: Buffer) =
   of LOADING_PAGE:
     discard
   var reprocess = false
+  var iq {.noinit.}: array[BufferSize, char]
+  var n = 0
   while true:
-    buffer.sstream.setPosition(0)
-    if not reprocess:
-      buffer.sstream.data.setLen(BufferSize)
     try:
-      var n = 0
       if not reprocess:
-        buffer.sstream.data.prepareMutation()
-        n = buffer.istream.recvData(addr buffer.sstream.data[0], BufferSize)
-        if n != buffer.sstream.data.len:
-          buffer.sstream.data.setLen(n)
-      if n != 0 or reprocess:
+        n = buffer.istream.recvData(addr iq[0], iq.len)
         buffer.available += n
-        if not buffer.processData():
+      res.lines = buffer.lines.len
+      if n != 0:
+        if not buffer.processData(iq.toOpenArray(0, n - 1)):
           if not buffer.firstBufferRead:
             reprocess = true
             continue
@@ -1105,10 +1202,8 @@ proc onload(buffer: Buffer) =
         buffer.firstBufferRead = true
         reprocess = false
         res.bytes = buffer.available
-      res.lines = buffer.lines.len
-      if buffer.istream.atEnd():
-        buffer.sstream = nil
-        # EOF
+        res.lines = buffer.lines.len
+      else: # EOF
         res.atend = true
         buffer.finishLoad().then(proc() =
           buffer.do_reshape()
@@ -1116,8 +1211,6 @@ proc onload(buffer: Buffer) =
           buffer.state = LOADED
           if buffer.document != nil: # may be nil if not buffer.ishtml
             buffer.document.readyState = READY_STATE_COMPLETE
-          if not buffer.ishtml:
-            buffer.lines.finishRender(buffer.srenderer[])
           buffer.dispatchLoadEvent()
           buffer.resolveTask(LOAD, res)
         )
@@ -1754,13 +1847,18 @@ proc launchBuffer*(config: BufferConfig, source: BufferSource,
     config: config,
     loader: loader,
     source: source,
-    sstream: newStringStream(),
     selector: newSelector[int](),
     estream: newFileStream(stderr),
     pstream: socks,
     rfd: socks.fd,
-    ssock: ssock
+    ssock: ssock,
+    needsBOMSniff: true,
+    seekable: true
   )
+  for i in countdown(buffer.config.charsets.high, 0):
+    buffer.charsetStack.add(buffer.config.charsets[i])
+  if buffer.charsetStack.len == 0:
+    buffer.charsetStack.add(DefaultCharset)
   gbuffer = buffer
   onSignal SIGTERM:
     discard sig
diff --git a/src/version.nim b/src/version.nim
index b2862180..1ac9b191 100644
--- a/src/version.nim
+++ b/src/version.nim
@@ -23,9 +23,9 @@ macro checkVersion(xs: static string, major, minor, patch: int) =
         gs & ").\n" &
         "Please run `make submodule` to update.")
 
-tryImport chakasu/version, "chakasu"
+tryImport chagashi/version, "chagashi"
 tryImport chame/version, "chame"
 
 static:
-  checkVersion("chakasu", 0, 3, 2)
+  checkVersion("chagashi", 0, 4, 0)
   checkVersion("chame", 0, 14, 3)