about summary refs log blame commit diff stats
path: root/src/html/chadombuilder.nim
blob: 3c5b2c12e4594c148954e5e739bbf387a9f98022 (plain) (tree)
1
2
3
4
5
6
7
8

                  
 
                 
               
                 
                 
                


                    
                       
 


                       

                             

                                       
                              
                                      
 
    
                                  
                                    

                                      


                                                                              
 
                                                       

                                  
                       








                                   




                                          
                                              
                                                                       


                                          


                                                    
                                                                      

                                        
                                                                         
                                        
 
                                                              



                                                     
                         
                                                                


                    

                                                              
                                             

                                   
                                    
                                    





                                 
                           
                                
                                                         
 
                                                                        


                                                             
                                                               
                       
                                       

                                                               
                                  
                                



                                    

                                

                                                         
       
           
                          
 
                                                                         

                                            
                                                                            
                                  
 
                                                                    

                                  
                                                                        

                                  


                                                          

                                                                                







                                                                      
                          
                                  


                                           

                                                                            

                
                                                                    
                                             
 
                                                                   
                             

                                                                   
                                                                  


                                                     
                                                                       


                                     

                    
                                                
                                  

                                    
       

                                                      
 
                                                      

                                        
                                                                       




                                         
 
                                                                

                                  



                            
                                                                        

                                                 

                                                  
                                     
                                          
                                        



                                                  
                                                               
                                
                                    
                          



                                                                               

                                                                             
                                     






                                    
                     

                           

   
                                                                            
                                                                 
                                       
                                        
                                                                 
                                 














                                                                       
                                          

                                 
                                                  
                                 
                                                      

                                                   
                                             


                                                                         

                  

                       

                                                                            

                                                                               
                                   

                     
                                          
   
                                           

                
                                                                        






                                             




                                                    





                                                    











                                                                        

            































                                                                            
                                           

                          


                        

                                           
                    
 
                                                                        
                                   

                 
                                        
                                                
                             
                                        


                               
                                                                     
                                                                             





                                                                         






                                                                             
import std/deques
import std/options

import html/catom
import html/dom
import html/enums
import js/jserror
import js/fromjs
import js/javascript
import types/url

import chagashi/charset

import chame/htmlparser
import chame/tags

export htmlparser.ParseResult

# DOMBuilder implementation for Chawan.

type CharsetConfidence* = enum
  ccTentative, ccCertain, ccIrrelevant

type
  HTML5ParserWrapper* = ref object
    parser: HTML5Parser[Node, CAtom]
    builder*: ChaDOMBuilder
    opts: HTML5ParserOpts[Node, CAtom]
    # hack so we don't have to worry about leaks or the GC deallocating parser
    refs: seq[Document]
    stoppedFromScript: bool

  ChaDOMBuilder = ref object of DOMBuilder[Node, CAtom]
    charset*: Charset
    confidence*: CharsetConfidence
    document*: Document
    factory: CAtomFactory
    poppedScript: HTMLScriptElement

type
  DOMBuilderImpl = ChaDOMBuilder
  HandleImpl = Node
  AtomImpl = CAtom

include chame/htmlparseriface

type DOMParser = ref object # JS interface

jsDestructor(DOMParser)

#TODO this is disgusting and should be removed
proc setActiveParser(document: Document; wrapper: HTML5ParserWrapper) =
  document.parser = cast[pointer](wrapper)
  wrapper.refs.add(document)

proc getDocumentImpl(builder: ChaDOMBuilder): Node =
  return builder.document

proc atomToTagTypeImpl(builder: ChaDOMBuilder; atom: CAtom): TagType =
  return builder.factory.toTagType(atom)

proc tagTypeToAtomImpl(builder: ChaDOMBuilder; tagType: TagType): CAtom =
  return builder.factory.toAtom(tagType)

proc strToAtomImpl(builder: ChaDOMBuilder; s: string): CAtom =
  return builder.factory.toAtom(s)

proc finish(builder: ChaDOMBuilder) =
  while builder.document.scriptsToExecOnLoad.len > 0:
    #TODO spin event loop
    let script = builder.document.scriptsToExecOnLoad.popFirst()
    script.execute()
  #TODO events

proc restart*(wrapper: HTML5ParserWrapper, charset: Charset) =
  let builder = wrapper.builder
  let document = newDocument(builder.factory)
  document.setActiveParser(wrapper)
  wrapper.refs.add(document)
  document.contentType = "text/html"
  let oldDocument = builder.document
  document.url = oldDocument.url
  let window = oldDocument.window
  if window != nil:
    document.window = window
    window.document = document
  builder.document = document
  builder.charset = charset
  assert document.factory != nil
  wrapper.parser = initHTML5Parser(builder, wrapper.opts)

proc setQuirksModeImpl(builder: ChaDOMBuilder; quirksMode: QuirksMode) =
  if not builder.document.parser_cannot_change_the_mode_flag:
    builder.document.mode = quirksMode

proc setEncodingImpl(builder: ChaDOMBuilder; encoding: string):
    SetEncodingResult =
  if builder.confidence != ccTentative:
    return SET_ENCODING_CONTINUE
  if builder.charset in {CHARSET_UTF_16_LE, CHARSET_UTF_16_BE}:
    builder.confidence = ccCertain
    return SET_ENCODING_CONTINUE
  let charset = getCharset(encoding)
  if charset == CHARSET_UNKNOWN:
    return SET_ENCODING_CONTINUE
  builder.confidence = ccCertain
  if charset == builder.charset:
    return SET_ENCODING_CONTINUE
  builder.charset = if charset == CHARSET_X_USER_DEFINED:
    CHARSET_WINDOWS_1252
  else:
    charset
  return SET_ENCODING_STOP

proc getTemplateContentImpl(builder: ChaDOMBuilder; handle: Node): Node =
  return HTMLTemplateElement(handle).content

proc getParentNodeImpl(builder: ChaDOMBuilder; handle: Node): Option[Node] =
  return option(handle.parentNode)

proc getLocalNameImpl(builder: ChaDOMBuilder; handle: Node): CAtom =
  return Element(handle).localName

proc getNamespaceImpl(builder: ChaDOMBuilder; handle: Node): Namespace =
  return Element(handle).namespace

proc createHTMLElementImpl(builder: ChaDOMBuilder): Node =
  return builder.document.newHTMLElement(TAG_HTML)

proc createElementForTokenImpl(builder: ChaDOMBuilder; localName: CAtom;
    namespace: Namespace; intendedParent: Node; htmlAttrs: Table[CAtom, string];
    xmlAttrs: seq[ParsedAttr[CAtom]]): Node =
  let document = builder.document
  let element = document.newHTMLElement(localName, namespace)
  for k, v in htmlAttrs:
    element.attr(k, v)
  for attr in xmlAttrs:
    element.attrns(attr.name, attr.prefix, attr.namespace, attr.value)
  if element.tagType in ResettableElements:
    element.resetElement()
  if element of HTMLScriptElement:
    let script = HTMLScriptElement(element)
    script.parserDocument = document
    script.forceAsync = false
    # Note: per standard, we could set already started to true here when we
    # are parsing from document.write, but that sounds like a horrible idea.
  return element

proc createCommentImpl(builder: ChaDOMBuilder; text: string): Node =
  return builder.document.createComment(text)

proc createDocumentTypeImpl(builder: ChaDOMBuilder; name, publicId,
    systemId: string): Node =
  return builder.document.newDocumentType(name, publicId, systemId)

proc insertBeforeImpl(builder: ChaDOMBuilder; parent, child: Node;
    before: Option[Node]) =
  discard parent.insertBefore(child, before.get(nil))

proc insertTextImpl(builder: ChaDOMBuilder; parent: Node; text: string;
    before: Option[Node]) =
  let prevSibling = if before.isSome:
    before.get.previousSibling
  else:
    parent.lastChild
  if prevSibling != nil and prevSibling of Text:
    Text(prevSibling).data &= text
    if parent of Element:
      Element(parent).invalid = true
  else:
    let text = builder.document.createTextNode(text)
    discard parent.insertBefore(text, before.get(nil))

proc removeImpl(builder: ChaDOMBuilder; child: Node) =
  child.remove(suppressObservers = true)

proc moveChildrenImpl(builder: ChaDOMBuilder; fromNode, toNode: Node) =
  var tomove = fromNode.childList
  for node in tomove:
    node.remove(suppressObservers = true)
  for child in tomove:
    toNode.insert(child, nil)

proc addAttrsIfMissingImpl(builder: ChaDOMBuilder; handle: Node;
    attrs: Table[CAtom, string]) =
  let element = Element(handle)
  for k, v in attrs:
    if not element.attrb(k):
      element.attr(k, v)

proc setScriptAlreadyStartedImpl(builder: ChaDOMBuilder; script: Node) =
  HTMLScriptElement(script).alreadyStarted = true

proc associateWithFormImpl(builder: ChaDOMBuilder;
    element, form, intendedParent: Node) =
  if form.inSameTree(intendedParent):
    #TODO remove following test eventually
    if element of FormAssociatedElement:
      let element = FormAssociatedElement(element)
      element.setForm(HTMLFormElement(form))
      element.parserInserted = true

proc elementPoppedImpl(builder: ChaDOMBuilder; element: Node) =
  let element = Element(element)
  if element of HTMLTextAreaElement:
    element.resetElement()
  elif element of HTMLScriptElement:
    assert builder.poppedScript == nil or not builder.document.scriptingEnabled
    builder.poppedScript = HTMLScriptElement(element)

proc newChaDOMBuilder(url: URL; window: Window; factory: CAtomFactory;
    confidence: CharsetConfidence; charset = DefaultCharset): ChaDOMBuilder =
  let document = newDocument(factory)
  document.contentType = "text/html"
  document.url = url
  if window != nil:
    document.window = window
    window.document = document
  return ChaDOMBuilder(
    document: document,
    factory: factory,
    confidence: confidence,
    charset: charset
  )

# https://html.spec.whatwg.org/multipage/parsing.html#parsing-html-fragments
proc parseHTMLFragment*(element: Element; s: string): seq[Node] =
  let url = parseURL("about:blank").get
  let factory = element.document.factory
  let builder = newChaDOMBuilder(url, nil, factory, ccIrrelevant)
  let document = builder.document
  document.mode = element.document.mode
  let state = case element.tagType
  of TAG_TITLE, TAG_TEXTAREA: RCDATA
  of TAG_STYLE, TAG_XMP, TAG_IFRAME, TAG_NOEMBED, TAG_NOFRAMES: RAWTEXT
  of TAG_SCRIPT: SCRIPT_DATA
  of TAG_NOSCRIPT:
    if element.document != nil and element.document.scriptingEnabled:
      RAWTEXT
    else:
      DATA
  of TAG_PLAINTEXT:
    PLAINTEXT
  else: DATA
  let root = document.newHTMLElement(TAG_HTML)
  document.append(root)
  let opts = HTML5ParserOpts[Node, CAtom](
    isIframeSrcdoc: false, #TODO?
    scripting: false,
    ctx: some((Node(element), element.localName)),
    initialTokenizerState: state,
    openElementsInit: @[(Node(root), root.localName)],
    pushInTemplate: element.tagType == TAG_TEMPLATE
  )
  var parser = initHTML5Parser(builder, opts)
  let res = parser.parseChunk(s.toOpenArray(0, s.high))
  # scripting is false and confidence is certain -> this must be continue
  assert res == PRES_CONTINUE
  parser.finish()
  builder.finish()
  return root.childList

proc newHTML5ParserWrapper*(window: Window; url: URL; factory: CAtomFactory;
    confidence: CharsetConfidence; charset: Charset): HTML5ParserWrapper =
  let opts = HTML5ParserOpts[Node, CAtom](scripting: window.settings.scripting)
  let builder = newChaDOMBuilder(url, window, factory, confidence, charset)
  let wrapper = HTML5ParserWrapper(
    builder: builder,
    opts: opts,
    parser: initHTML5Parser(builder, opts)
  )
  builder.document.setActiveParser(wrapper)
  return wrapper

proc parseBuffer*(wrapper: HTML5ParserWrapper; buffer: openArray[char]):
    ParseResult =
  let builder = wrapper.builder
  let document = builder.document
  var res = wrapper.parser.parseChunk(buffer)
  # set insertion point for when it's needed
  var ip = wrapper.parser.getInsertionPoint()
  while res == PRES_SCRIPT:
    #TODO microtask
    let script = builder.poppedScript
    builder.poppedScript = nil
    document.writeBuffers.add(DocumentWriteBuffer())
    script.prepare()
    while document.parserBlockingScript != nil:
      let script = document.parserBlockingScript
      document.parserBlockingScript = nil
      #TODO style sheet
      script.execute()
      assert document.parserBlockingScript != script
    if wrapper.stoppedFromScript:
      # document.write inserted a meta charset tag
      break
    assert document.writeBuffers[^1].toOA().len == 0
    discard document.writeBuffers.pop()
    assert document.writeBuffers.len == 0
    if ip == buffer.len:
      # script was at the end of the buffer; nothing to parse
      break
    # parse rest of input buffer
    res = wrapper.parser.parseChunk(buffer.toOpenArray(ip, buffer.high))
    ip += wrapper.parser.getInsertionPoint() # move insertion point
  return res

# Called from dom whenever document.write is executed.
# We consume everything pushed into the top buffer.
proc CDB_parseDocumentWriteChunk(wrapper: pointer) {.exportc.} =
  let wrapper = cast[HTML5ParserWrapper](wrapper)
  let builder = wrapper.builder
  let document = builder.document
  let buffer = document.writeBuffers[^1]
  var res = wrapper.parser.parseChunk(buffer.toOA())
  if res == PRES_SCRIPT:
    document.writeBuffers.add(DocumentWriteBuffer())
    while true:
      buffer.i += wrapper.parser.getInsertionPoint()
      #TODO microtask
      let script = builder.poppedScript
      builder.poppedScript = nil
      script.prepare()
      while document.parserBlockingScript != nil:
        let script = document.parserBlockingScript
        document.parserBlockingScript = nil
        #TODO style sheet
        script.execute()
        assert document.parserBlockingScript != script
      res = wrapper.parser.parseChunk(buffer.toOA())
      if res != PRES_SCRIPT:
        break
    assert document.writeBuffers[^1].i == document.writeBuffers[^1].data.len
    discard document.writeBuffers.pop()
  assert builder.poppedScript == nil
  buffer.i = buffer.data.len
  if res == PRES_STOP:
    wrapper.stoppedFromScript = true

proc finish*(wrapper: HTML5ParserWrapper) =
  wrapper.parser.finish()
  wrapper.builder.finish()
  for r in wrapper.refs:
    r.parser = nil
  wrapper.refs.setLen(0)

proc newDOMParser(): DOMParser {.jsctor.} =
  return DOMParser()

proc parseFromString(ctx: JSContext; parser: DOMParser; str, t: string):
    JSResult[Document] {.jsfunc.} =
  case t
  of "text/html":
    let global = JS_GetGlobalObject(ctx)
    let window = fromJS[Window](ctx, global).get
    JS_FreeValue(ctx, global)
    let url = if window.document != nil:
      window.document.url
    else:
      newURL("about:blank").get
    #TODO this is probably broken in client (or at least sub-optimal)
    let builder = newChaDOMBuilder(url, window, window.factory, ccIrrelevant)
    var parser = initHTML5Parser(builder, HTML5ParserOpts[Node, CAtom]())
    let res = parser.parseChunk(str)
    assert res == PRES_CONTINUE
    parser.finish()
    builder.finish()
    return ok(builder.document)
  of "text/xml", "application/xml", "application/xhtml+xml", "image/svg+xml":
    return err(newInternalError("XML parsing is not supported yet"))
  else:
    return err(newTypeError("Invalid mime type"))

proc addHTMLModule*(ctx: JSContext) =
  ctx.registerType(DOMParser)