import std/streams import bindings/quickjs import js/error import js/javascript import js/jstypes import chagashi/charset import chagashi/decoder import chagashi/decodercore import chagashi/validator import chagashi/validatorcore type JSTextEncoder = ref object JSTextDecoder = ref object encoding: Charset ignoreBOM {.jsget.}: bool fatal {.jsget.}: bool doNotFlush: bool bomSeen: bool td: TextDecoder tv: ref TextValidatorUTF8 validateBuf: seq[uint8] jsDestructor(JSTextDecoder) jsDestructor(JSTextEncoder) type TextDecoderOptions = object of JSDict fatal: bool ignoreBOM: bool func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()): JSResult[JSTextDecoder] {.jsctor.} = let encoding = getCharset(label) if encoding in {CHARSET_UNKNOWN, CHARSET_REPLACEMENT}: return err(newRangeError("Invalid encoding label")) return ok(JSTextDecoder( ignoreBOM: options.ignoreBOM, fatal: options.fatal, td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil, tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil, encoding: encoding )) type Growbuf = object p: ptr UncheckedArray[uint8] cap: int len: int {.warning[Deprecated]: off.}: proc `=destroy`(growbuf: var Growbuf) = if growbuf.p != nil: dealloc(growbuf.p) growbuf.p = nil const BufferSize = 128 proc grow(buf: var Growbuf) = if buf.cap == 0: buf.cap = BufferSize else: buf.cap *= 2 buf.p = cast[ptr UncheckedArray[uint8]](buf.p.realloc(buf.cap)) proc write(buf: var Growbuf; s: openArray[uint8]) = if buf.len + s.len > buf.cap: buf.grow() if s.len > 0: copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len) buf.len += s.len proc write(buf: var Growbuf; s: string) = if buf.len + s.len > buf.cap: buf.grow() if s.len > 0: copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len) buf.len += s.len proc decode0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView; stream: bool): JSResult[JSValue] = var oq = Growbuf( p: cast[ptr UncheckedArray[uint8]](alloc(BufferSize)), len: 0, cap: BufferSize ) var i = 0 let H = int(input.abuf.len) - 1 template handle_error = if this.fatal: return errTypeError("Failed to decode string") oq.write("\uFFFD") i = this.td.i while true: case this.td.decode(input.abuf.p.toOpenArray(i, H), oq.p.toOpenArray(0, oq.cap - 1), oq.len) of tdrDone: if not stream: case this.td.finish() of tdfrDone: discard of tdfrError: handle_error break of tdrError: handle_error of tdrReqOutput: oq.grow() return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len))) proc validate0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView; stream: bool): JSResult[JSValue] = # assume input is valid; do not allocate yet var oq = Growbuf(p: nil, len: 0, cap: 0) var i = 0 let H = int(input.abuf.len) - 1 var n = 0 template handle_error = if this.fatal: return errTypeError("Failed to decode string") # write from previous error (or beginning) to the last valid char oq.write(input.abuf.p.toOpenArray(i, n)) oq.write("\uFFFD") this.validateBuf.setLen(0) i = this.tv.i while true: case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n) of tvrDone: break of tvrError: handle_error if not stream: case this.tv[].finish() of tvrDone: discard of tvrError: handle_error if this.validateBuf.len > 0 and n > -1: oq.write(this.validateBuf) oq.write(input.abuf.p.toOpenArray(i, n)) this.validateBuf.setLen(0) this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high)) if oq.len > 0: assert oq.p != nil return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len))) assert oq.p == nil return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1))) type TextDecodeOptions = object of JSDict stream: bool #TODO AllowSharedBufferSource proc decode(ctx: JSContext; this: JSTextDecoder; input = none(JSArrayBufferView); options = TextDecodeOptions()): JSResult[JSValue] {.jsfunc.} = if not this.doNotFlush: if this.td != nil: this.td = newTextDecoder(this.encoding) else: assert this.tv != nil this.tv = (ref TextValidatorUTF8)() this.bomSeen = false if this.doNotFlush != options.stream: this.doNotFlush = options.stream if input.isSome: if this.td != nil: return this.decode0(ctx, input.get, options.stream) else: assert this.encoding == CHARSET_UTF_8 # just validate return this.validate0(ctx, input.get, options.stream) return ok(JS_NewString(ctx, "")) func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} = return $this.encoding func newTextEncoder(): JSTextEncoder {.jsctor.} = return JSTextEncoder() func jencoding(this: JSTextEncoder): string {.jsfget: "encoding".} = return "utf-8" proc dealloc_wrap(rt: JSRuntime; opaque, p: pointer) {.cdecl.} = dealloc(p) proc encode(this: JSTextEncoder; input = ""): JSUint8Array {.jsfunc.} = # we have to validate input first :/ #TODO it is possible to do less copies here... var input = input.toValidUTF8() let buf = cast[ptr UncheckedArray[uint8]](alloc(input.len)) copyMem(buf, addr input[0], input.len) let abuf = JSArrayBuffer( p: buf, len: csize_t(input.len), dealloc: dealloc_wrap ) return JSUint8Array( abuf: abuf, offset: 0, nmemb: csize_t(input.len) ) #TODO encodeInto proc addEncodingModule*(ctx: JSContext) = ctx.registerType(JSTextDecoder, name = "TextDecoder") ctx.registerType(JSTextEncoder, name = "TextEncoder")