about summary refs log tree commit diff stats
path: root/src/server
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-02-22 20:14:08 +0100
committerbptato <nincsnevem662@gmail.com>2024-02-22 20:14:19 +0100
commit78ffc938fa7e4baad0a55625026b765d215be1aa (patch)
tree857db2e963efb13933d1e7954e9d4dc657b6f11d /src/server
parentdef15ede4fbd686b0ee9b193f41b2a47190aa43a (diff)
downloadchawan-78ffc938fa7e4baad0a55625026b765d215be1aa.tar.gz
Replace Chakasu with Chagashi
The API is horrid :( but at least it copies less.

TODO: think of a better API.
Diffstat (limited to 'src/server')
-rw-r--r--src/server/buffer.nim208
1 files changed, 153 insertions, 55 deletions
diff --git a/src/server/buffer.nim b/src/server/buffer.nim
index 27308ff3..788c0aea 100644
--- a/src/server/buffer.nim
+++ b/src/server/buffer.nim
@@ -51,7 +51,10 @@ import utils/strwidth
 import utils/twtstr
 import xhr/formdata as formdata_impl
 
-import chakasu/charset
+from chagashi/decoder import newTextDecoder
+import chagashi/charset
+import chagashi/decodercore
+import chagashi/validatorcore
 
 import chame/tags
 
@@ -103,7 +106,6 @@ type
     prevstyled: StyledNode
     selector: Selector[int]
     istream: SocketStream
-    sstream: StringStream
     available: int
     state: BufferState
     prevnode: StyledNode
@@ -118,8 +120,15 @@ type
     quirkstyle: CSSStylesheet
     userstyle: CSSStylesheet
     htmlParser: HTML5ParserWrapper
-    srenderer: ref StreamRenderer
+    srenderer: StreamRenderer
     bgcolor: CellColor
+    needsBOMSniff: bool
+    seekable: bool
+    decoder: TextDecoder
+    validator: ref TextValidatorUTF8
+    validateBuf: seq[char]
+    charsetStack: seq[Charset]
+    charset: Charset
 
   InterfaceOpaque = ref object
     stream: Stream
@@ -274,11 +283,6 @@ macro task(fun: typed) =
   pfun.istask = true
   fun
 
-func charsets(buffer: Buffer): seq[Charset] =
-  if buffer.source.charset != CHARSET_UNKNOWN:
-    return @[buffer.source.charset]
-  return buffer.config.charsets
-
 func getTitleAttr(node: StyledNode): string =
   if node == nil:
     return ""
@@ -632,13 +636,115 @@ proc do_reshape(buffer: Buffer) =
     buffer.lines.renderDocument(buffer.bgcolor, styledRoot, buffer.attrs)
     buffer.prevstyled = styledRoot
 
-proc processData(buffer: Buffer): bool =
+proc processData0(buffer: Buffer, data: openArray[char]): bool =
   if buffer.ishtml:
-    let res = buffer.htmlParser.parseAll()
+    if buffer.htmlParser.parseBuffer(data) == PRES_STOP:
+      buffer.charsetStack = @[buffer.htmlParser.builder.charset]
+      return false
     buffer.document = buffer.htmlParser.builder.document
-    return res
   else:
-    return buffer.lines.renderStream(buffer.srenderer[])
+    buffer.lines.renderChunk(buffer.srenderer, data)
+  true
+
+func canSwitch(buffer: Buffer): bool {.inline.} =
+  if buffer.ishtml and buffer.htmlParser.builder.confidence != ccTentative:
+    return false
+  return buffer.charsetStack.len > 0
+
+proc initDecoder(buffer: Buffer) =
+  if buffer.charset != CHARSET_UTF_8:
+    buffer.decoder = newTextDecoder(buffer.charset)
+  else:
+    buffer.validator = (ref TextValidatorUTF8)()
+
+proc switchCharset(buffer: Buffer) =
+  buffer.charset = buffer.charsetStack.pop()
+  buffer.initDecoder()
+  if buffer.ishtml:
+    buffer.htmlParser.restart(buffer.charset)
+  else:
+    buffer.srenderer.rewind()
+    buffer.lines.setLen(0)
+
+const BufferSize = 16384
+
+proc decodeData(buffer: Buffer, iq: openArray[uint8]): bool =
+  var oq {.noinit.}: array[BufferSize, char]
+  var n = 0
+  while true:
+    case buffer.decoder.decode(iq, oq.toOpenArrayByte(0, oq.high), n)
+    of tdrDone:
+      if not buffer.processData0(oq.toOpenArray(0, n - 1)):
+        assert buffer.canSwitch
+        buffer.switchCharset()
+        return false
+      break
+    of tdrReqOutput:
+      # flush output buffer
+      if not buffer.processData0(oq.toOpenArray(0, n - 1)):
+        assert buffer.canSwitch
+        buffer.switchCharset()
+        return false
+      n = 0
+    of tdrError:
+      if buffer.canSwitch:
+        buffer.switchCharset()
+        return false
+      doAssert buffer.processData0("\uFFFD")
+  true
+
+proc validateData(buffer: Buffer, iq: openArray[char]): bool =
+  var pi = 0
+  var n = 0
+  while true:
+    case buffer.validator[].validate(iq.toOpenArrayByte(0, iq.high), n)
+    of tvrDone:
+      if n == -1:
+        return true
+      if buffer.validateBuf.len > 0:
+        doAssert buffer.processData0(buffer.validateBuf)
+        buffer.validateBuf.setLen(0)
+      if not buffer.processData0(iq.toOpenArray(pi, n)):
+        assert buffer.canSwitch
+        buffer.switchCharset()
+        return false
+      buffer.validateBuf.add(iq.toOpenArray(n + 1, iq.high))
+      break
+    of tvrError:
+      buffer.validateBuf.setLen(0)
+      if buffer.canSwitch:
+        buffer.switchCharset()
+        return false
+      if n > pi:
+        doAssert buffer.processData0(iq.toOpenArray(pi, n - 1))
+      doAssert buffer.processData0("\uFFFD")
+      pi = buffer.validator.i
+  true
+
+proc bomSniff(buffer: Buffer, iq: openArray[char]): int =
+  if iq[0] == '\xFE' and iq[1] == '\xFF':
+    buffer.charsetStack = @[CHARSET_UTF_16_BE]
+    buffer.switchCharset()
+    return 2
+  if iq[0] == '\xFF' and iq[1] == '\xFE':
+    buffer.charsetStack = @[CHARSET_UTF_16_LE]
+    buffer.switchCharset()
+    return 2
+  if iq[0] == '\xEF' and iq[1] == '\xBB' and iq[2] == '\xBF':
+    buffer.charsetStack = @[CHARSET_UTF_8]
+    buffer.switchCharset()
+    return 3
+  return 0
+
+proc processData(buffer: Buffer, iq: openArray[char]): bool =
+  var start = 0
+  if buffer.needsBOMSniff:
+    if iq.len >= 3: # ehm... TODO
+      start += buffer.bomSniff(iq)
+    buffer.needsBOMSniff = false
+  if buffer.decoder != nil:
+    return buffer.decodeData(iq.toOpenArrayByte(start, iq.high))
+  return buffer.validateData(iq.toOpenArray(start, iq.high))
 
 proc windowChange*(buffer: Buffer, attrs: WindowAttributes) {.proxy.} =
   buffer.attrs = attrs
@@ -717,44 +823,38 @@ proc rewind(buffer: Buffer): bool =
 
 proc setHTML(buffer: Buffer, ishtml: bool) =
   buffer.ishtml = ishtml
+  buffer.charset = buffer.charsetStack.pop()
+  buffer.initDecoder()
   if ishtml:
     let factory = newCAtomFactory()
     buffer.factory = factory
-    if buffer.config.scripting:
-      buffer.window = newWindow(
-        buffer.config.scripting,
-        buffer.config.images,
-        buffer.selector,
-        buffer.attrs,
-        factory,
-        proc(url: URL) = buffer.navigate(url),
-        some(buffer.loader)
-      )
+    let navigate = if buffer.config.scripting:
+      proc(url: URL) = buffer.navigate(url)
     else:
-      buffer.window = newWindow(
-        buffer.config.scripting,
-        buffer.config.images,
-        buffer.selector,
-        buffer.attrs,
-        factory,
-        nil,
-        some(buffer.loader)
-      )
+      nil
+    buffer.window = newWindow(
+      buffer.config.scripting,
+      buffer.config.images,
+      buffer.selector,
+      buffer.attrs,
+      factory,
+      navigate,
+      some(buffer.loader)
+    )
     buffer.htmlParser = newHTML5ParserWrapper(
-      buffer.sstream,
       buffer.window,
       buffer.url,
       buffer.factory,
-      buffer.charsets,
-      seekable = true
+      buffer.charset
     )
+    assert buffer.htmlParser.builder.document != nil
     const css = staticRead"res/ua.css"
     const quirk = css & staticRead"res/quirk.css"
     buffer.uastyle = css.parseStylesheet(factory)
     buffer.quirkstyle = quirk.parseStylesheet(factory)
     buffer.userstyle = parseStylesheet(buffer.config.userstyle, factory)
   else:
-    buffer.srenderer = newStreamRenderer(buffer.sstream, buffer.charsets)
+    buffer.srenderer = newStreamRenderer()
 
 proc connect*(buffer: Buffer): ConnectResult {.proxy.} =
   if buffer.connected:
@@ -1025,13 +1125,14 @@ proc dispatchEvent(buffer: Buffer, ctype: string, elem: Element): tuple[
       break
   return (called, canceled)
 
-const BufferSize = 16384
-
 proc finishLoad(buffer: Buffer): EmptyPromise =
   if buffer.state != LOADING_PAGE:
     let p = EmptyPromise()
     p.resolve()
     return p
+  if buffer.decoder != nil and buffer.decoder.finish() == tdfrError or
+      buffer.validator != nil and buffer.validator[].finish() == tvrError:
+    doAssert buffer.processData0("\uFFFD")
   var p: EmptyPromise
   if buffer.ishtml:
     buffer.htmlParser.finish()
@@ -1083,20 +1184,16 @@ proc onload(buffer: Buffer) =
   of LOADING_PAGE:
     discard
   var reprocess = false
+  var iq {.noinit.}: array[BufferSize, char]
+  var n = 0
   while true:
-    buffer.sstream.setPosition(0)
-    if not reprocess:
-      buffer.sstream.data.setLen(BufferSize)
     try:
-      var n = 0
       if not reprocess:
-        buffer.sstream.data.prepareMutation()
-        n = buffer.istream.recvData(addr buffer.sstream.data[0], BufferSize)
-        if n != buffer.sstream.data.len:
-          buffer.sstream.data.setLen(n)
-      if n != 0 or reprocess:
+        n = buffer.istream.recvData(addr iq[0], iq.len)
         buffer.available += n
-        if not buffer.processData():
+      res.lines = buffer.lines.len
+      if n != 0:
+        if not buffer.processData(iq.toOpenArray(0, n - 1)):
           if not buffer.firstBufferRead:
             reprocess = true
             continue
@@ -1105,10 +1202,8 @@ proc onload(buffer: Buffer) =
         buffer.firstBufferRead = true
         reprocess = false
         res.bytes = buffer.available
-      res.lines = buffer.lines.len
-      if buffer.istream.atEnd():
-        buffer.sstream = nil
-        # EOF
+        res.lines = buffer.lines.len
+      else: # EOF
         res.atend = true
         buffer.finishLoad().then(proc() =
           buffer.do_reshape()
@@ -1116,8 +1211,6 @@ proc onload(buffer: Buffer) =
           buffer.state = LOADED
           if buffer.document != nil: # may be nil if not buffer.ishtml
             buffer.document.readyState = READY_STATE_COMPLETE
-          if not buffer.ishtml:
-            buffer.lines.finishRender(buffer.srenderer[])
           buffer.dispatchLoadEvent()
           buffer.resolveTask(LOAD, res)
         )
@@ -1754,13 +1847,18 @@ proc launchBuffer*(config: BufferConfig, source: BufferSource,
     config: config,
     loader: loader,
     source: source,
-    sstream: newStringStream(),
     selector: newSelector[int](),
     estream: newFileStream(stderr),
     pstream: socks,
     rfd: socks.fd,
-    ssock: ssock
+    ssock: ssock,
+    needsBOMSniff: true,
+    seekable: true
   )
+  for i in countdown(buffer.config.charsets.high, 0):
+    buffer.charsetStack.add(buffer.config.charsets[i])
+  if buffer.charsetStack.len == 0:
+    buffer.charsetStack.add(DefaultCharset)
   gbuffer = buffer
   onSignal SIGTERM:
     discard sig