about summary refs log blame commit diff stats
path: root/src/js/encoding.nim
blob: cfcf8f0719c59288163b34836927f044ff505f11 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
                       

                    
                 
 




                             

    
                            
 
                            
                     
                             
                         

                    


                             
 

                           




                                          

                                                                       


                                                        
                          
                                 


                                                                          


                      


















                                                                 
                                                   





                                                        
                                         





                                                        
                                                                           



























                                                                       
                                                                             


































                                                                              



                                         


                                                                    
                         




                                             


                                       
                  








                                                                    

                       

                                                 
 
                                                                    

                
                                                                

            
                                                                       


                                                
                                                             
                                        













                                         

                                                       
import bindings/quickjs
import js/error
import js/javascript
import js/jstypes

import chagashi/charset
import chagashi/decoder
import chagashi/decodercore
import chagashi/validator
import chagashi/validatorcore

type
  JSTextEncoder = ref object

  JSTextDecoder = ref object
    encoding: Charset
    ignoreBOM {.jsget.}: bool
    fatal {.jsget.}: bool
    doNotFlush: bool
    bomSeen: bool
    td: TextDecoder
    tv: ref TextValidatorUTF8
    validateBuf: seq[uint8]

jsDestructor(JSTextDecoder)
jsDestructor(JSTextEncoder)

type TextDecoderOptions = object of JSDict
  fatal: bool
  ignoreBOM: bool

func newJSTextDecoder(label = "utf-8", options = TextDecoderOptions()):
    JSResult[JSTextDecoder] {.jsctor.} =
  let encoding = getCharset(label)
  if encoding in {CHARSET_UNKNOWN, CHARSET_REPLACEMENT}:
    return err(newRangeError("Invalid encoding label"))
  return ok(JSTextDecoder(
    ignoreBOM: options.ignoreBOM,
    fatal: options.fatal,
    td: if encoding != CHARSET_UTF_8: newTextDecoder(encoding) else: nil,
    tv: if encoding == CHARSET_UTF_8: (ref TextValidatorUTF8)() else: nil,
    encoding: encoding
  ))

type Growbuf = object
  p: ptr UncheckedArray[uint8]
  cap: int
  len: int

{.warning[Deprecated]: off.}:
  proc `=destroy`(growbuf: var Growbuf) =
    if growbuf.p != nil:
      dealloc(growbuf.p)
      growbuf.p = nil

const BufferSize = 128
proc grow(buf: var Growbuf) =
  if buf.cap == 0:
    buf.cap = BufferSize
  else:
    buf.cap *= 2
  buf.p = cast[ptr UncheckedArray[uint8]](buf.p.realloc(buf.cap))

proc write(buf: var Growbuf; s: openArray[uint8]) =
  if buf.len + s.len > buf.cap:
    buf.grow()
  if s.len > 0:
    copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len)
  buf.len += s.len

proc write(buf: var Growbuf; s: string) =
  if buf.len + s.len > buf.cap:
    buf.grow()
  if s.len > 0:
    copyMem(addr buf.p[buf.len], unsafeAddr s[0], s.len)
  buf.len += s.len

proc decode0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView;
    stream: bool): JSResult[JSValue] =
  var oq = Growbuf(
    p: cast[ptr UncheckedArray[uint8]](alloc(BufferSize)),
    len: 0,
    cap: BufferSize
  )
  var i = 0
  let H = int(input.abuf.len) - 1
  template handle_error =
    if this.fatal:
      return errTypeError("Failed to decode string")
    oq.write("\uFFFD")
    i = this.td.i
  while true:
    case this.td.decode(input.abuf.p.toOpenArray(i, H),
      oq.p.toOpenArray(0, oq.cap - 1), oq.len)
    of tdrDone:
      if not stream:
        case this.td.finish()
        of tdfrDone: discard
        of tdfrError: handle_error
      break
    of tdrError:
      handle_error
    of tdrReqOutput:
      oq.grow()
  return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))

proc validate0(this: JSTextDecoder; ctx: JSContext; input: JSArrayBufferView;
    stream: bool): JSResult[JSValue] =
  # assume input is valid; do not allocate yet
  var oq = Growbuf(p: nil, len: 0, cap: 0)
  var i = 0
  let H = int(input.abuf.len) - 1
  var n = 0
  template handle_error =
    if this.fatal:
      return errTypeError("Failed to decode string")
    # write from previous error (or beginning) to the last valid char
    oq.write(input.abuf.p.toOpenArray(i, n))
    oq.write("\uFFFD")
    this.validateBuf.setLen(0)
    i = this.tv.i
  while true:
    case this.tv[].validate(input.abuf.p.toOpenArray(i, H), n)
    of tvrDone:
      break
    of tvrError:
      handle_error
  if not stream:
    case this.tv[].finish()
    of tvrDone: discard
    of tvrError: handle_error
  if this.validateBuf.len > 0 and n > -1:
    oq.write(this.validateBuf)
    oq.write(input.abuf.p.toOpenArray(i, n))
    this.validateBuf.setLen(0)
  this.validateBuf.add(input.abuf.p.toOpenArray(n + 1, input.abuf.high))
  if oq.len > 0:
    assert oq.p != nil
    return ok(JS_NewStringLen(ctx, cast[cstring](oq.p), csize_t(oq.len)))
  assert oq.p == nil
  return ok(JS_NewStringLen(ctx, cast[cstring](input.abuf.p), csize_t(n + 1)))

type TextDecodeOptions = object of JSDict
  stream: bool

#TODO AllowSharedBufferSource
proc decode(ctx: JSContext; this: JSTextDecoder;
    input = none(JSArrayBufferView); options = TextDecodeOptions()):
    JSResult[JSValue] {.jsfunc.} =
  if not this.doNotFlush:
    if this.td != nil:
      this.td = newTextDecoder(this.encoding)
    else:
      assert this.tv != nil
      this.tv = (ref TextValidatorUTF8)()
    this.bomSeen = false
  if this.doNotFlush != options.stream:
    this.doNotFlush = options.stream
  if input.isSome:
    if this.td != nil:
      return this.decode0(ctx, input.get, options.stream)
    else:
      assert this.encoding == CHARSET_UTF_8
      # just validate
      return this.validate0(ctx, input.get, options.stream)
  return ok(JS_NewString(ctx, ""))

func jencoding(this: JSTextDecoder): string {.jsfget: "encoding".} =
  return $this.encoding

func newTextEncoder(): JSTextEncoder {.jsctor.} =
  return JSTextEncoder()

func jencoding(this: JSTextEncoder): string {.jsfget: "encoding".} =
  return "utf-8"

proc dealloc_wrap(rt: JSRuntime; opaque, p: pointer) {.cdecl.} =
  dealloc(p)

proc encode(this: JSTextEncoder; input = ""): JSUint8Array {.jsfunc.} =
  # we have to validate input first :/
  #TODO it is possible to do less copies here...
  var input = input.toValidUTF8()
  let buf = cast[ptr UncheckedArray[uint8]](alloc(input.len))
  copyMem(buf, addr input[0], input.len)
  let abuf = JSArrayBuffer(
    p: buf,
    len: csize_t(input.len),
    dealloc: dealloc_wrap
  )
  return JSUint8Array(
    abuf: abuf,
    offset: 0,
    nmemb: csize_t(input.len)
  )

#TODO encodeInto

proc addEncodingModule*(ctx: JSContext) =
  ctx.registerType(JSTextDecoder, name = "TextDecoder")
  ctx.registerType(JSTextEncoder, name = "TextEncoder")