Update Chame, Chagashi

author: bptato <nincsnevem662@gmail.com> 2024-06-13 23:04:21 +0200
committer: bptato <nincsnevem662@gmail.com> 2024-06-13 23:24:53 +0200
commit: d90e456b80ee11a84eb2a29ce01794fa662eb7b9 (patch)
tree: 30d358ef3a5c50c6a0272a25f6aff9d150480c4a
parent: 17bf0e843084712d9a914868ec44896f48d9d13a (diff)
download: chawan-d90e456b80ee11a84eb2a29ce01794fa662eb7b9.tar.gz
9 files changed, 56 insertions, 159 deletions
diff --git a/lib/chagashi b/lib/chagashi
-Subproject b2eaf326b280e12e36bc3e9c80bc247458cf865
+Subproject bc61299cf73636214cbc3b59059b8606edac959
diff --git a/lib/chame b/lib/chame
-Subproject 0abadff0147d355aa8ddc15ff8a592e9df900e1
+Subproject 5836c2f412bd4323f45c664632836722a057e0c
diff --git a/src/html/dom.nim b/src/html/dom.nim
index 29a521c6..0a11791f 100644
--- a/src/html/dom.nim
+++ b/src/html/dom.nim
@@ -46,7 +46,6 @@ import utils/twtstr
 
 import chagashi/charset
 import chagashi/decoder
-import chagashi/validator
 
 import chame/tags
 
@@ -3589,10 +3588,8 @@ proc fetchClassicScript(element: HTMLScriptElement; url: URL;
     element.onComplete(ScriptResult(t: RESULT_NULL))
     return
   let s = response.body.recvAll()
-  let source = if cs in {CHARSET_UNKNOWN, CHARSET_UTF_8}:
-    s.toValidUTF8()
-  else:
-    newTextDecoder(cs).decodeAll(s)
+  let cs = if cs == CHARSET_UNKNOWN: CHARSET_UTF_8 else: cs
+  let source = s.decodeAll(cs)
   let script = window.jsctx.createClassicScript(source, url, options, false)
   element.onComplete(ScriptResult(t: RESULT_SCRIPT, script: script))
 
diff --git a/src/js/encoding.nim b/src/js/encoding.nim
index ccadce16..6c32ee3c 100644
--- a/src/js/encoding.nim
+++ b/src/js/encoding.nim
@@ -1,8 +1,6 @@
 import chagashi/charset
 import chagashi/decoder
 import chagashi/decodercore
-import chagashi/validator
-import chagashi/validatorcore
 import monoucha/javascript
 import monoucha/jserror
 import monoucha/jstypes
@@ -19,8 +17,6 @@ type
     doNotFlush: bool
     bomSeen: bool
     td: TextDecoder
-    tv: ref TextValidatorUTF8
-    validateBuf: seq[uint8]
 
 jsDestructor(JSTextDecoder)
 jsDestructor(JSTextEncoder)
@@ -37,8 +33,7 @@ func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()):
   return ok(JSTextDecoder(
     ignoreBOM: options.ignoreBOM,
     fatal: options.fatal,
-    td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil,
-    tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil,
+    td: newTextDecoder(encoding),
     encoding: encoding
   ))
 
@@ -82,64 +77,31 @@ proc decode0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView;
     len: 0,
     cap: BufferSize
   )
+  let td = this.td
   var i = 0
   let H = int(input.abuf.len) - 1
   template handle_error =
     if this.fatal:
       return errTypeError("Failed to decode string")
     oq.write("\uFFFD")
-    i = this.td.i
+    i = td.i
   while true:
-    case this.td.decode(input.abuf.p.toOpenArray(i, H),
+    case td.decode(input.abuf.p.toOpenArray(i, H),
       oq.p.toOpenArray(0, oq.cap - 1), oq.len)
     of tdrDone:
       if not stream:
-        case this.td.finish()
+        case td.finish()
         of tdfrDone: discard
         of tdfrError: handle_error
       break
+    of tdrReadInput:
+      oq.write(input.abuf.p.toOpenArray(i + td.pi, i + td.ri))
     of tdrError:
       handle_error
     of tdrReqOutput:
       oq.grow()
   return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
 
-proc validate0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView;
-    stream: bool): JSResult[JSValue] =
-  # assume input is valid; do not allocate yet
-  var oq = Growbuf(p: nil, len: 0, cap: 0)
-  var i = 0
-  let H = int(input.abuf.len) - 1
-  var n = 0
-  template handle_error =
-    if this.fatal:
-      return errTypeError("Failed to decode string")
-    # write from previous error (or beginning) to the last valid char
-    oq.write(input.abuf.p.toOpenArray(i, n))
-    oq.write("\uFFFD")
-    this.validateBuf.setLen(0)
-    i = this.tv.i
-  while true:
-    case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n)
-    of tvrDone:
-      break
-    of tvrError:
-      handle_error
-  if not stream:
-    case this.tv[].finish()
-    of tvrDone: discard
-    of tvrError: handle_error
-  if this.validateBuf.len > 0 and n > -1:
-    oq.write(this.validateBuf)
-    oq.write(input.abuf.p.toOpenArray(i, n))
-    this.validateBuf.setLen(0)
-  this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high))
-  if oq.len > 0:
-    assert oq.p != nil
-    return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
-  assert oq.p == nil
-  return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1)))
-
 type TextDecodeOptions = object of JSDict
   stream: bool
 
@@ -148,21 +110,12 @@ proc decode(ctx: JSContext; this: JSTextDecoder;
     input = none(JSArrayBufferView); options = TextDecodeOptions()):
     JSResult[JSValue] {.jsfunc.} =
   if not this.doNotFlush:
-    if this.td != nil:
-      this.td = newTextDecoder(this.encoding)
-    else:
-      assert this.tv != nil
-      this.tv = (ref TextValidatorUTF8)()
+    this.td = newTextDecoder(this.encoding)
     this.bomSeen = false
   if this.doNotFlush != options.stream:
     this.doNotFlush = options.stream
   if input.isSome:
-    if this.td != nil:
-      return this.decode0(ctx, input.get, options.stream)
-    else:
-      assert this.encoding == CHARSET_UTF_8
-      # just validate
-      return this.validate0(ctx, input.get, options.stream)
+    return this.decode0(ctx, input.get, options.stream)
   return ok(JS_NewString(ctx, ""))
 
 func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} =
diff --git a/src/loader/response.nim b/src/loader/response.nim
index ca300957..8ea17e64 100644
--- a/src/loader/response.nim
+++ b/src/loader/response.nim
@@ -3,7 +3,6 @@ import std/tables
 
 import chagashi/charset
 import chagashi/decoder
-import chagashi/validator
 import io/promise
 import io/socketstream
 import loader/headers
@@ -120,10 +119,7 @@ proc text*(response: Response): Promise[JSResult[string]] {.jsfunc.} =
     let charset = response.getCharset(CHARSET_UTF_8)
     #TODO this is inefficient
     # maybe add a JS type that turns a seq[char] into JS strings
-    if charset == CHARSET_UTF_8:
-      ok(s.toValidUTF8())
-    else:
-      ok(newTextDecoder(charset).decodeAll(s))
+    ok(s.decodeAll(charset))
   )
 
 proc blob*(response: Response): Promise[JSResult[Blob]] {.jsfunc.} =
diff --git a/src/local/lineedit.nim b/src/local/lineedit.nim
index 22fd3988..ba02e2ae 100644
--- a/src/local/lineedit.nim
+++ b/src/local/lineedit.nim
@@ -12,7 +12,6 @@ import utils/twtstr
 import utils/wordbreak
 
 import chagashi/charset
-import chagashi/validator
 import chagashi/decoder
 
 type
@@ -153,7 +152,7 @@ proc backspace(edit: LineEdit) {.jsfunc.} =
     edit.cursori -= len
     edit.cursorx -= r.width()
     edit.invalid = true
-
+ 
 proc write*(edit: LineEdit; s: string; cs: Charset): bool =
   if cs == CHARSET_UTF_8:
     if s.validateUTF8Surr() != -1:
@@ -169,7 +168,10 @@ proc write*(edit: LineEdit; s: string; cs: Charset): bool =
   return true
 
 proc write(edit: LineEdit; s: string): bool {.jsfunc.} =
-  edit.write(s, CHARSET_UTF_8)
+  if s.validateUTF8Surr() != -1:
+    return false
+  edit.insertCharseq(s)
+  return true
 
 proc delete(edit: LineEdit) {.jsfunc.} =
   if edit.cursori < edit.news.len:
@@ -311,22 +313,23 @@ proc nextHist(edit: LineEdit) {.jsfunc.} =
 proc windowChange*(edit: LineEdit; attrs: WindowAttributes) =
   edit.maxwidth = attrs.width - edit.promptw - 1
 
-proc readLine*(prompt, current: string; termwidth: int;
-    disallowed: set[char]; hide: bool; hist: LineHistory): LineEdit =
-  result = LineEdit(
+proc readLine*(prompt, current: string; termwidth: int; disallowed: set[char];
+    hide: bool; hist: LineHistory): LineEdit =
+  let promptw = prompt.width()
+  return LineEdit(
     prompt: prompt,
-    promptw: prompt.width(),
+    promptw: promptw,
     news: current,
     disallowed: disallowed,
     hide: hide,
-    invalid: true
+    invalid: true,
+    cursori: current.len,
+    cursorx: current.notwidth(),
+    # - 1, so that the cursor always has place
+    maxwidth: termwidth - promptw - 1,
+    hist: hist,
+    histindex: hist.lines.len
   )
-  result.cursori = result.news.len
-  result.cursorx = result.news.notwidth()
-  # - 1, so that the cursor always has place
-  result.maxwidth = termwidth - result.promptw - 1
-  result.hist = hist
-  result.histindex = result.hist.lines.len
 
 proc addLineEditModule*(ctx: JSContext) =
   ctx.registerType(LineEdit)
diff --git a/src/local/term.nim b/src/local/term.nim
index 38368515..7dd6d951 100644
--- a/src/local/term.nim
+++ b/src/local/term.nim
@@ -19,8 +19,8 @@ import utils/strwidth
 import utils/twtstr
 
 import chagashi/charset
+import chagashi/decoder
 import chagashi/encoder
-import chagashi/validator
 
 #TODO switch away from termcap...
 
diff --git a/src/server/buffer.nim b/src/server/buffer.nim
index 8b31fb64..e201b281 100644
--- a/src/server/buffer.nim
+++ b/src/server/buffer.nim
@@ -54,10 +54,9 @@ import types/winattrs
 import utils/strwidth
 import utils/twtstr
 
-from chagashi/decoder import newTextDecoder
 import chagashi/charset
+import chagashi/decoder
 import chagashi/decodercore
-import chagashi/validatorcore
 
 import chame/tags
 
@@ -116,9 +115,7 @@ type
     htmlParser: HTML5ParserWrapper
     bgcolor: CellColor
     needsBOMSniff: bool
-    decoder: TextDecoder
-    validator: ref TextValidatorUTF8
-    validateBuf: seq[char]
+    ctx: TextDecoderContext
     charsetStack: seq[Charset]
     charset: Charset
     cacheId: int
@@ -751,13 +748,10 @@ func canSwitch(buffer: Buffer): bool {.inline.} =
   return buffer.htmlParser.builder.confidence == ccTentative and
     buffer.charsetStack.len > 0
 
+const BufferSize = 16384
+
 proc initDecoder(buffer: Buffer) =
-  if buffer.charset != CHARSET_UTF_8:
-    buffer.validator = nil
-    buffer.decoder = newTextDecoder(buffer.charset)
-  else:
-    buffer.decoder = nil
-    buffer.validator = (ref TextValidatorUTF8)()
+  buffer.ctx = initTextDecoderContext(buffer.charset, demFatal, BufferSize)
 
 proc switchCharset(buffer: Buffer) =
   buffer.charset = buffer.charsetStack.pop()
@@ -766,57 +760,14 @@ proc switchCharset(buffer: Buffer) =
   buffer.document = buffer.htmlParser.builder.document
   buffer.prevStyled = nil
 
-const BufferSize = 16384
-
 proc decodeData(buffer: Buffer; iq: openArray[uint8]): bool =
-  var oq {.noinit.}: array[BufferSize, char]
-  var n = 0
-  while true:
-    case buffer.decoder.decode(iq, oq.toOpenArrayByte(0, oq.high), n)
-    of tdrDone:
-      if not buffer.processData0(oq.toOpenArray(0, n - 1)):
-        buffer.switchCharset()
-        return false
-      break
-    of tdrReqOutput:
-      # flush output buffer
-      if not buffer.processData0(oq.toOpenArray(0, n - 1)):
-        buffer.switchCharset()
-        return false
-      n = 0
-    of tdrError:
-      if buffer.canSwitch:
-        buffer.switchCharset()
-        return false
-      doAssert buffer.processData0("\uFFFD")
-  true
-
-proc validateData(buffer: Buffer; iq: openArray[char]): bool =
-  var pi = 0
-  var n = 0
-  while true:
-    case buffer.validator[].validate(iq.toOpenArrayByte(0, iq.high), n)
-    of tvrDone:
-      if n == -1:
-        return true
-      if buffer.validateBuf.len > 0:
-        doAssert buffer.processData0(buffer.validateBuf)
-        buffer.validateBuf.setLen(0)
-      if not buffer.processData0(iq.toOpenArray(pi, n)):
-        buffer.switchCharset()
-        return false
-      buffer.validateBuf.add(iq.toOpenArray(n + 1, iq.high))
-      break
-    of tvrError:
-      buffer.validateBuf.setLen(0)
-      if buffer.canSwitch:
-        buffer.switchCharset()
-        return false
-      if n >= pi:
-        doAssert buffer.processData0(iq.toOpenArray(pi, n))
-      doAssert buffer.processData0("\uFFFD")
-      pi = buffer.validator.i
-  true
+  if not buffer.canSwitch():
+    buffer.ctx.errorMode = demReplacement
+  for chunk in buffer.ctx.decode(iq, finish = false):
+    if not buffer.processData0(chunk.toOpenArray()):
+      buffer.switchCharset()
+      return false
+  return not buffer.ctx.failed
 
 proc bomSniff(buffer: Buffer; iq: openArray[char]): int =
   if iq[0] == '\xFE' and iq[1] == '\xFF':
@@ -839,9 +790,7 @@ proc processData(buffer: Buffer; iq: openArray[char]): bool =
     if iq.len >= 3: # ehm... TODO
       start += buffer.bomSniff(iq)
     buffer.needsBOMSniff = false
-  if buffer.decoder != nil:
-    return buffer.decodeData(iq.toOpenArrayByte(start, iq.high))
-  return buffer.validateData(iq.toOpenArray(start, iq.high))
+  return buffer.decodeData(iq.toOpenArrayByte(start, iq.high))
 
 proc windowChange*(buffer: Buffer; attrs: WindowAttributes) {.proxy.} =
   buffer.attrs = attrs
@@ -1162,8 +1111,7 @@ proc finishLoad(buffer: Buffer): EmptyPromise =
     p.resolve()
     return p
   buffer.state = bsLoadingResources
-  if buffer.decoder != nil and buffer.decoder.finish() == tdfrError or
-      buffer.validator != nil and buffer.validator[].finish() == tvrError:
+  if buffer.ctx.td != nil and buffer.ctx.td.finish() == tdfrError:
     doAssert buffer.processData0("\uFFFD")
   buffer.htmlParser.finish()
   buffer.document.readyState = rsInteractive
diff --git a/src/version.nim b/src/version.nim
index c1c90c54..ef58eb72 100644
--- a/src/version.nim
+++ b/src/version.nim
@@ -27,6 +27,6 @@ tryImport chame/version, "chame"
 tryImport monoucha/version, "monoucha"
 
 static:
-  checkVersion("chagashi", 0, 4, 2)
-  checkVersion("chame", 0, 14, 5)
+  checkVersion("chagashi", 0, 5, 1)
+  checkVersion("chame", 1, 0, 0)
   checkVersion("monoucha", 0, 1, 1)
author	bptato <nincsnevem662@gmail.com>	2024-06-13 23:04:21 +0200
committer	bptato <nincsnevem662@gmail.com>	2024-06-13 23:24:53 +0200
commit	d90e456b80ee11a84eb2a29ce01794fa662eb7b9 (patch)
tree	30d358ef3a5c50c6a0272a25f6aff9d150480c4a
parent	17bf0e843084712d9a914868ec44896f48d9d13a (diff)
download	chawan-d90e456b80ee11a84eb2a29ce01794fa662eb7b9.tar.gz