diff options
author | bptato <nincsnevem662@gmail.com> | 2024-02-22 20:14:08 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-02-22 20:14:19 +0100 |
commit | 78ffc938fa7e4baad0a55625026b765d215be1aa (patch) | |
tree | 857db2e963efb13933d1e7954e9d4dc657b6f11d /src/js | |
parent | def15ede4fbd686b0ee9b193f41b2a47190aa43a (diff) | |
download | chawan-78ffc938fa7e4baad0a55625026b765d215be1aa.tar.gz |
Replace Chakasu with Chagashi
The API is horrid :( but at least it copies less. TODO: think of a better API.
Diffstat (limited to 'src/js')
-rw-r--r-- | src/js/encoding.nim | 197 | ||||
-rw-r--r-- | src/js/jstypes.nim | 3 |
2 files changed, 147 insertions, 53 deletions
diff --git a/src/js/encoding.nim b/src/js/encoding.nim index 0e6643fb..b2ef1d1b 100644 --- a/src/js/encoding.nim +++ b/src/js/encoding.nim @@ -1,98 +1,189 @@ import std/streams +import bindings/quickjs import js/error import js/javascript import js/jstypes -import chakasu/charset -import chakasu/decoderstream -import chakasu/encoderstream +import chagashi/charset +import chagashi/decoder +import chagashi/decodercore +import chagashi/validator +import chagashi/validatorcore type - TextEncoder = ref object + JSTextEncoder = ref object - TextDecoder = ref object + JSTextDecoder = ref object encoding: Charset - errorMode: DecoderErrorMode ignoreBOM {.jsget.}: bool + fatal {.jsget.}: bool doNotFlush: bool bomSeen: bool - decoder: DecoderStream - encoder: EncoderStream # to return the string to JS - istream: StringStream + td: TextDecoder + tv: ref TextValidatorUTF8 + validateBuf: seq[uint8] -jsDestructor(TextDecoder) -jsDestructor(TextEncoder) +jsDestructor(JSTextDecoder) +jsDestructor(JSTextEncoder) type TextDecoderOptions = object of JSDict fatal: bool ignoreBOM: bool -func newTextDecoder(label = "utf-8", options = TextDecoderOptions()): - JSResult[TextDecoder] {.jsctor.} = - let errorMode = if options.fatal: - DECODER_ERROR_MODE_FATAL - else: - DECODER_ERROR_MODE_REPLACEMENT +func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()): + JSResult[JSTextDecoder] {.jsctor.} = let encoding = getCharset(label) if encoding in {CHARSET_UNKNOWN, CHARSET_REPLACEMENT}: return err(newRangeError("Invalid encoding label")) - return ok(TextDecoder( - errorMode: errorMode, + return ok(JSTextDecoder( ignoreBOM: options.ignoreBOM, + fatal: options.fatal, + td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil, + tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil, encoding: encoding )) +type Growbuf = object + p: ptr UncheckedArray[uint8] + cap: int + len: int + +{.warning[Deprecated]: off.}: + proc `=destroy`(growbuf: var Growbuf) = + if growbuf.p != nil: + dealloc(growbuf.p) + growbuf.p = nil + +const BufferSize = 128 +proc grow(buf: var Growbuf) = + if buf.cap == 0: + buf.cap = BufferSize + else: + buf.cap *= 2 + buf.p = cast[ptr UncheckedArray[uint8]](buf.p.realloc(buf.cap)) + +proc write(buf: var Growbuf, s: openArray[uint8]) = + if buf.len + s.len > buf.cap: + buf.grow() + if s.len > 0: + copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len) + buf.len += s.len + +proc write(buf: var Growbuf, s: string) = + if buf.len + s.len > buf.cap: + buf.grow() + if s.len > 0: + copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len) + buf.len += s.len + +proc decode0(this: JSTextDecoder, ctx: JSContext, input: JSArrayBufferView, + stream: bool): JSResult[JSValue] = + var oq = Growbuf( + p: cast[ptr UncheckedArray[uint8]](alloc(BufferSize)), + len: 0, + cap: BufferSize + ) + var i = 0 + let H = int(input.abuf.len) - 1 + template handle_error = + if this.fatal: + return errTypeError("Failed to decode string") + oq.write("\uFFFD") + i = this.td.i + while true: + case this.td.decode(input.abuf.p.toOpenArray(i, H), + oq.p.toOpenArray(0, oq.cap - 1), oq.len) + of tdrDone: + if not stream: + case this.td.finish() + of tdfrDone: discard + of tdfrError: handle_error + break + of tdrError: + handle_error + of tdrReqOutput: + oq.grow() + return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len))) + +proc validate0(this: JSTextDecoder, ctx: JSContext, input: JSArrayBufferView, + stream: bool): JSResult[JSValue] = + # assume input is valid; do not allocate yet + var oq = Growbuf(p: nil, len: 0, cap: 0) + var i = 0 + let H = int(input.abuf.len) - 1 + var n = 0 + template handle_error = + if this.fatal: + return errTypeError("Failed to decode string") + # write from previous error (or beginning) to the last valid char + oq.write(input.abuf.p.toOpenArray(i, n)) + oq.write("\uFFFD") + this.validateBuf.setLen(0) + i = this.tv.i + while true: + case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n) + of tvrDone: + break + of tvrError: + handle_error + if not stream: + case this.tv[].finish() + of tvrDone: discard + of tvrError: handle_error + if this.validateBuf.len > 0 and n > -1: + oq.write(this.validateBuf) + oq.write(input.abuf.p.toOpenArray(i, n)) + this.validateBuf.setLen(0) + this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high)) + if oq.len > 0: + assert oq.p != nil + return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len))) + assert oq.p == nil + return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1))) + type TextDecodeOptions = object of JSDict stream: bool #TODO AllowSharedBufferSource -proc decode(this: TextDecoder, input = none(JSArrayBufferView), - options = TextDecodeOptions()): string {.jsfunc.} = +proc decode(ctx: JSContext, this: JSTextDecoder, input = opt(JSArrayBufferView), + options = TextDecodeOptions()): JSResult[JSValue] {.jsfunc.} = if not this.doNotFlush: - if this.istream != nil: - this.istream.close() - if this.decoder != nil: - this.decoder.close() - if this.encoder != nil: - this.encoder.close() - this.istream = newStringStream() - this.decoder = newDecoderStream(this.istream, cs = this.encoding, - errormode = this.errorMode) - this.encoder = newEncoderStream(this.decoder, cs = CHARSET_UTF_8) + if this.td != nil: + this.td = newTextDecoder(this.encoding) + else: + assert this.tv != nil + this.tv = (ref TextValidatorUTF8)() this.bomSeen = false if this.doNotFlush != options.stream: this.doNotFlush = options.stream - this.decoder.setInhibitCheckEnd(options.stream) if input.isSome: - let input = input.get - let pos = this.istream.getPosition() - #TODO input offset? - this.istream.writeData(input.abuf.p, int(input.abuf.len)) - this.istream.setPosition(pos) - #TODO this should return a JSString, so we do not needlessly re-encode - # the output. (Right now we do, implicitly through toJS.) - return this.encoder.readAll() - -func jencoding(this: TextDecoder): string {.jsfget: "encoding".} = + if this.td != nil: + return this.decode0(ctx, input.get, options.stream) + else: + assert this.encoding == CHARSET_UTF_8 + # just validate + return this.validate0(ctx, input.get, options.stream) + return ok(JS_NewString(ctx, "")) + +func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} = return $this.encoding -func fatal(this: TextDecoder): bool {.jsfget.} = - return this.errorMode == DECODER_ERROR_MODE_FATAL - -func newTextEncoder(): TextEncoder {.jsctor.} = - return TextEncoder() +func newTextEncoder(): JSTextEncoder {.jsctor.} = + return JSTextEncoder() -func jencoding(this: TextEncoder): string {.jsfget: "encoding".} = +func jencoding(this: JSTextEncoder): string {.jsfget: "encoding".} = return "utf-8" proc dealloc_wrap(rt: JSRuntime, opaque, p: pointer) {.cdecl.} = dealloc(p) -proc encode(this: TextEncoder, input = ""): JSUint8Array {.jsfunc.} = - # input is already UTF-8 here :P +proc encode(this: JSTextEncoder, input = ""): JSUint8Array {.jsfunc.} = + # we have to validate input first :/ + #TODO it is possible to do less copies here... + var input = input.toValidUTF8() let buf = cast[ptr UncheckedArray[uint8]](alloc(input.len)) - copyMem(buf, unsafeAddr input[0], input.len) + copyMem(buf, addr input[0], input.len) let abuf = JSArrayBuffer( p: buf, len: csize_t(input.len), @@ -107,5 +198,5 @@ proc encode(this: TextEncoder, input = ""): JSUint8Array {.jsfunc.} = #TODO encodeInto proc addEncodingModule*(ctx: JSContext) = - ctx.registerType(TextDecoder) - ctx.registerType(TextEncoder) + ctx.registerType(JSTextDecoder, name = "TextDecoder") + ctx.registerType(JSTextEncoder, name = "TextEncoder") diff --git a/src/js/jstypes.nim b/src/js/jstypes.nim index b8ef6c55..5336f067 100644 --- a/src/js/jstypes.nim +++ b/src/js/jstypes.nim @@ -30,3 +30,6 @@ type abuf*: JSArrayBuffer offset*: csize_t # offset into the buffer nmemb*: csize_t # number of members + +func high*(abuf: JSArrayBuffer): int = + return int(abuf.len) - 1 |