about summary refs log tree commit diff stats
path: root/src/js
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2024-02-22 20:14:08 +0100
committerbptato <nincsnevem662@gmail.com>2024-02-22 20:14:19 +0100
commit78ffc938fa7e4baad0a55625026b765d215be1aa (patch)
tree857db2e963efb13933d1e7954e9d4dc657b6f11d /src/js
parentdef15ede4fbd686b0ee9b193f41b2a47190aa43a (diff)
downloadchawan-78ffc938fa7e4baad0a55625026b765d215be1aa.tar.gz
Replace Chakasu with Chagashi
The API is horrid :( but at least it copies less.

TODO: think of a better API.
Diffstat (limited to 'src/js')
-rw-r--r--src/js/encoding.nim197
-rw-r--r--src/js/jstypes.nim3
2 files changed, 147 insertions, 53 deletions
diff --git a/src/js/encoding.nim b/src/js/encoding.nim
index 0e6643fb..b2ef1d1b 100644
--- a/src/js/encoding.nim
+++ b/src/js/encoding.nim
@@ -1,98 +1,189 @@
 import std/streams
 
+import bindings/quickjs
 import js/error
 import js/javascript
 import js/jstypes
 
-import chakasu/charset
-import chakasu/decoderstream
-import chakasu/encoderstream
+import chagashi/charset
+import chagashi/decoder
+import chagashi/decodercore
+import chagashi/validator
+import chagashi/validatorcore
 
 type
-  TextEncoder = ref object
+  JSTextEncoder = ref object
 
-  TextDecoder = ref object
+  JSTextDecoder = ref object
     encoding: Charset
-    errorMode: DecoderErrorMode
     ignoreBOM {.jsget.}: bool
+    fatal {.jsget.}: bool
     doNotFlush: bool
     bomSeen: bool
-    decoder: DecoderStream
-    encoder: EncoderStream # to return the string to JS
-    istream: StringStream
+    td: TextDecoder
+    tv: ref TextValidatorUTF8
+    validateBuf: seq[uint8]
 
-jsDestructor(TextDecoder)
-jsDestructor(TextEncoder)
+jsDestructor(JSTextDecoder)
+jsDestructor(JSTextEncoder)
 
 type TextDecoderOptions = object of JSDict
   fatal: bool
   ignoreBOM: bool
 
-func newTextDecoder(label = "utf-8", options = TextDecoderOptions()):
-    JSResult[TextDecoder] {.jsctor.} =
-  let errorMode = if options.fatal:
-    DECODER_ERROR_MODE_FATAL
-  else:
-    DECODER_ERROR_MODE_REPLACEMENT
+func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()):
+    JSResult[JSTextDecoder] {.jsctor.} =
   let encoding = getCharset(label)
   if encoding in {CHARSET_UNKNOWN, CHARSET_REPLACEMENT}:
     return err(newRangeError("Invalid encoding label"))
-  return ok(TextDecoder(
-    errorMode: errorMode,
+  return ok(JSTextDecoder(
     ignoreBOM: options.ignoreBOM,
+    fatal: options.fatal,
+    td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil,
+    tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil,
     encoding: encoding
   ))
 
+type Growbuf = object
+  p: ptr UncheckedArray[uint8]
+  cap: int
+  len: int
+
+{.warning[Deprecated]: off.}:
+  proc `=destroy`(growbuf: var Growbuf) =
+    if growbuf.p != nil:
+      dealloc(growbuf.p)
+      growbuf.p = nil
+
+const BufferSize = 128
+proc grow(buf: var Growbuf) =
+  if buf.cap == 0:
+    buf.cap = BufferSize
+  else:
+    buf.cap *= 2
+  buf.p = cast[ptr UncheckedArray[uint8]](buf.p.realloc(buf.cap))
+
+proc write(buf: var Growbuf, s: openArray[uint8]) =
+  if buf.len + s.len > buf.cap:
+    buf.grow()
+  if s.len > 0:
+    copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len)
+  buf.len += s.len
+
+proc write(buf: var Growbuf, s: string) =
+  if buf.len + s.len > buf.cap:
+    buf.grow()
+  if s.len > 0:
+    copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len)
+  buf.len += s.len
+
+proc decode0(this: JSTextDecoder, ctx: JSContext, input: JSArrayBufferView,
+    stream: bool): JSResult[JSValue] =
+  var oq = Growbuf(
+    p: cast[ptr UncheckedArray[uint8]](alloc(BufferSize)),
+    len: 0,
+    cap: BufferSize
+  )
+  var i = 0
+  let H = int(input.abuf.len) - 1
+  template handle_error =
+    if this.fatal:
+      return errTypeError("Failed to decode string")
+    oq.write("\uFFFD")
+    i = this.td.i
+  while true:
+    case this.td.decode(input.abuf.p.toOpenArray(i, H),
+      oq.p.toOpenArray(0, oq.cap - 1), oq.len)
+    of tdrDone:
+      if not stream:
+        case this.td.finish()
+        of tdfrDone: discard
+        of tdfrError: handle_error
+      break
+    of tdrError:
+      handle_error
+    of tdrReqOutput:
+      oq.grow()
+  return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
+
+proc validate0(this: JSTextDecoder, ctx: JSContext, input: JSArrayBufferView,
+    stream: bool): JSResult[JSValue] =
+  # assume input is valid; do not allocate yet
+  var oq = Growbuf(p: nil, len: 0, cap: 0)
+  var i = 0
+  let H = int(input.abuf.len) - 1
+  var n = 0
+  template handle_error =
+    if this.fatal:
+      return errTypeError("Failed to decode string")
+    # write from previous error (or beginning) to the last valid char
+    oq.write(input.abuf.p.toOpenArray(i, n))
+    oq.write("\uFFFD")
+    this.validateBuf.setLen(0)
+    i = this.tv.i
+  while true:
+    case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n)
+    of tvrDone:
+      break
+    of tvrError:
+      handle_error
+  if not stream:
+    case this.tv[].finish()
+    of tvrDone: discard
+    of tvrError: handle_error
+  if this.validateBuf.len > 0 and n > -1:
+    oq.write(this.validateBuf)
+    oq.write(input.abuf.p.toOpenArray(i, n))
+    this.validateBuf.setLen(0)
+  this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high))
+  if oq.len > 0:
+    assert oq.p != nil
+    return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
+  assert oq.p == nil
+  return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1)))
+
 type TextDecodeOptions = object of JSDict
   stream: bool
 
 #TODO AllowSharedBufferSource
-proc decode(this: TextDecoder, input = none(JSArrayBufferView),
-    options = TextDecodeOptions()): string {.jsfunc.} =
+proc decode(ctx: JSContext, this: JSTextDecoder, input = opt(JSArrayBufferView),
+    options = TextDecodeOptions()): JSResult[JSValue] {.jsfunc.} =
   if not this.doNotFlush:
-    if this.istream != nil:
-      this.istream.close()
-    if this.decoder != nil:
-      this.decoder.close()
-    if this.encoder != nil:
-      this.encoder.close()
-    this.istream = newStringStream()
-    this.decoder = newDecoderStream(this.istream, cs = this.encoding,
-      errormode = this.errorMode)
-    this.encoder = newEncoderStream(this.decoder, cs = CHARSET_UTF_8)
+    if this.td != nil:
+      this.td = newTextDecoder(this.encoding)
+    else:
+      assert this.tv != nil
+      this.tv = (ref TextValidatorUTF8)()
     this.bomSeen = false
   if this.doNotFlush != options.stream:
     this.doNotFlush = options.stream
-    this.decoder.setInhibitCheckEnd(options.stream)
   if input.isSome:
-    let input = input.get
-    let pos = this.istream.getPosition()
-    #TODO input offset?
-    this.istream.writeData(input.abuf.p, int(input.abuf.len))
-    this.istream.setPosition(pos)
-  #TODO this should return a JSString, so we do not needlessly re-encode
-  # the output. (Right now we do, implicitly through toJS.)
-  return this.encoder.readAll()
-
-func jencoding(this: TextDecoder): string {.jsfget: "encoding".} =
+    if this.td != nil:
+      return this.decode0(ctx, input.get, options.stream)
+    else:
+      assert this.encoding == CHARSET_UTF_8
+      # just validate
+      return this.validate0(ctx, input.get, options.stream)
+  return ok(JS_NewString(ctx, ""))
+
+func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} =
   return $this.encoding
 
-func fatal(this: TextDecoder): bool {.jsfget.} =
-  return this.errorMode == DECODER_ERROR_MODE_FATAL
-
-func newTextEncoder(): TextEncoder {.jsctor.} =
-  return TextEncoder()
+func newTextEncoder(): JSTextEncoder {.jsctor.} =
+  return JSTextEncoder()
 
-func jencoding(this: TextEncoder): string {.jsfget: "encoding".} =
+func jencoding(this: JSTextEncoder): string {.jsfget: "encoding".} =
   return "utf-8"
 
 proc dealloc_wrap(rt: JSRuntime, opaque, p: pointer) {.cdecl.} =
   dealloc(p)
 
-proc encode(this: TextEncoder, input = ""): JSUint8Array {.jsfunc.} =
-  # input is already UTF-8 here :P
+proc encode(this: JSTextEncoder, input = ""): JSUint8Array {.jsfunc.} =
+  # we have to validate input first :/
+  #TODO it is possible to do less copies here...
+  var input = input.toValidUTF8()
   let buf = cast[ptr UncheckedArray[uint8]](alloc(input.len))
-  copyMem(buf, unsafeAddr input[0], input.len)
+  copyMem(buf, addr input[0], input.len)
   let abuf = JSArrayBuffer(
     p: buf,
     len: csize_t(input.len),
@@ -107,5 +198,5 @@ proc encode(this: TextEncoder, input = ""): JSUint8Array {.jsfunc.} =
 #TODO encodeInto
 
 proc addEncodingModule*(ctx: JSContext) =
-  ctx.registerType(TextDecoder)
-  ctx.registerType(TextEncoder)
+  ctx.registerType(JSTextDecoder, name = "TextDecoder")
+  ctx.registerType(JSTextEncoder, name = "TextEncoder")
diff --git a/src/js/jstypes.nim b/src/js/jstypes.nim
index b8ef6c55..5336f067 100644
--- a/src/js/jstypes.nim
+++ b/src/js/jstypes.nim
@@ -30,3 +30,6 @@ type
     abuf*: JSArrayBuffer
     offset*: csize_t # offset into the buffer
     nmemb*: csize_t # number of members
+
+func high*(abuf: JSArrayBuffer): int =
+  return int(abuf.len) - 1