about summary refs log blame commit diff stats
path: root/src/html/htmlparser.nim
blob: e13592014db3b406126a36aabe860a40b5c97924 (plain) (tree)
1
2
3
4
5
6
7
8
9
             
              

               
                

              
 
                
               
                
                         

                    
 
    

                                       

                             



                         
                              




                                     
                      





























                                                                               




























                                                                                   
 


                                                
       



































































































                                                                                                        
                                                                                                            



                                                                                      


                                       
                            
                          
 
                                                                                  

                                                                             
                                             


                                                




                                                              
                                                                                                 








                                                                        

                                                           
                                        

































































                                                 








                                                                      




















































































































































                                                                                                




                                                   




                                                                        
                               




                                                                        
                               






                                                                         
                               








                                                                                        
                                                       
















                                                     
                                       

                                               
 









                                                                                                                     

                    





















                                                                                                                           
                                                       
















                                                                             

                          









































                                                                                  
                 


























                                                                                                                                                        
                                                             

                                                                        
                                                  



                                                                          
                                                    











                                                                                   
                                                  


                                                                                        
                                                








































                                                                                        
           









                                                                 
           

               



                                                              
















































                                                                                                                                                    
                                                                                  






                                                                          
                                                                              











































                                                                                      
             




                                                                                               
                                                                                                     







                                                
                                                      
















                                                               
             


                                                        
                                                                    




















































                                                                                                                                        
                                         













                                                                                
                                                         

                                                                               
                                                                                                                                                    


























                                                                                                                           
                                                         














                                                                 
                                                          
                                                         









                                                                   
                                                             






                                                              
                                                               

                                                                                     
                                     
                                                               
                                                      

                                                   
                                                                     
                    
                                                                                                                             
                                                                                                                   

                                                             

                                       
                                              




                                                                       
                                                                                


                                                             
                        

                            


                                                                  
                                                       
                                                               
                                              







                                                      
                                                                                                            

                                                      
                                                    


























                                                               
             









                                                                                                                                      
             








                                                                                                               
             






                                                         
             





                                                               
             





                                                               
             






























                                                                                    
             













                                                                
                                                                















                                                                     
                                                                



                                                                
                                                                

















                                                                             
                                                                  










                                                                             
             

                                                                     
                                                                     









                                                                                    
                                                                    
             




                                                                 
                                                                









                                                                               
             

                                                              
                                                              



                                                                    
             

                                                                     
                                                                     






                                                                       
                                                                    


                             
                     

                      
                                 




                                                            
                              
                 
                         




                                                   
                                                       
                                               

                                               
                                                
                                           
                   


                                                       

































                                                                           
             

                                                                     
                                                                     



































































                                                                                                                             
             







                                                                                  
             













































                                                                              
























































                                                                                              
             
                                                                 






                                                                
                                                                 
























































                                                                                   
                                                          










                                                          
                                                                       



                                                                   
                                                                   











































































































































                                                                                                       
                                                                       









                                                                         
                                                                     


























































                                                                                             
                                                                  




                                                                   
                                                                  






                                                                       
                                                                  











                                                                                                       
                                                                








                                                                             
                                                                  

















































                                                                                    
                                                                    























































































                                                                                                            
 




                                                                 
                                                  


                                                






































                                                 
 






















                                                                                                      




                                                               
                                     
                                                                                            

                                                        
           




                                                                                                              
           















                                                                                                                    
         
                                           
 

                          
 
                        
 




                                                 














                                                                                              
import macros
import options
import sequtils
import streams
import strformat
import tables
import unicode

import css/sheet
import html/dom
import html/tags
import html/htmltokenizer
import js/javascript
import utils/twtstr

type
  DOMParser = ref object # JS interface

  OpenElements = seq[Element]

  HTML5Parser = object
    case fragment: bool
    of true: ctx: Element
    else: discard
    openElements: OpenElements
    insertionMode: InsertionMode
    oldInsertionMode: InsertionMode
    templateModes: seq[InsertionMode]
    head: Element
    tokenizer: Tokenizer
    document: Document
    form: HTMLFormElement
    fosterParenting: bool
    scripting: bool
    activeFormatting: seq[(Element, Token)] # nil => marker
    framesetok: bool
    ignoreLF: bool
    pendingTableChars: string
    pendingTableCharsWhitespace: bool

  AdjustedInsertionLocation = tuple[inside: Node, before: Node]

# 13.2.4.1
  InsertionMode = enum
    INITIAL, BEFORE_HTML, BEFORE_HEAD, IN_HEAD, IN_HEAD_NOSCRIPT, AFTER_HEAD,
    IN_BODY, TEXT, IN_TABLE, IN_TABLE_TEXT, IN_CAPTION, IN_COLUMN_GROUP,
    IN_TABLE_BODY, IN_ROW, IN_CELL, IN_SELECT, IN_SELECT_IN_TABLE, IN_TEMPLATE,
    AFTER_BODY, IN_FRAMESET, AFTER_FRAMESET, AFTER_AFTER_BODY,
    AFTER_AFTER_FRAMESET

proc resetInsertionMode(parser: var HTML5Parser) =
  template switch_insertion_mode_and_return(mode: InsertionMode) =
    parser.insertionMode = mode
    return
  for i in countdown(parser.openElements.high, 0):
    var node = parser.openElements[i]
    let last = i == 0
    if parser.fragment:
      node = parser.ctx
    if node.tagType == TAG_SELECT:
      if not last:
        for j in countdown(parser.openElements.high, 1):
          let ancestor = parser.openElements[j]
          case ancestor.tagType
          of TAG_TEMPLATE: break
          of TAG_TABLE: switch_insertion_mode_and_return IN_SELECT_IN_TABLE
          else: discard
      switch_insertion_mode_and_return IN_SELECT
    case node.tagType
    of TAG_TD, TAG_TH:
      if not last:
        switch_insertion_mode_and_return IN_CELL
    of TAG_TR: switch_insertion_mode_and_return IN_ROW
    of TAG_TBODY, TAG_THEAD, TAG_TFOOT: switch_insertion_mode_and_return IN_CAPTION
    of TAG_COLGROUP: switch_insertion_mode_and_return IN_COLUMN_GROUP
    of TAG_TABLE: switch_insertion_mode_and_return IN_TABLE
    of TAG_TEMPLATE: switch_insertion_mode_and_return parser.templateModes[^1]
    of TAG_HEAD:
      if not last:
        switch_insertion_mode_and_return IN_HEAD
    of TAG_BODY: switch_insertion_mode_and_return IN_BODY
    of TAG_FRAMESET: switch_insertion_mode_and_return IN_FRAMESET
    of TAG_HTML:
      if parser.head != nil:
        switch_insertion_mode_and_return BEFORE_HEAD
      else:
        switch_insertion_mode_and_return AFTER_HEAD
    else: discard
    if last:
      switch_insertion_mode_and_return IN_BODY

func currentNode(parser: HTML5Parser): Element =
  if parser.openElements.len == 0:
    assert false
  else:
    return parser.openElements[^1]

func adjustedCurrentNode(parser: HTML5Parser): Element =
  if parser.fragment: parser.ctx
  else: parser.currentNode

template parse_error() = discard

func lastElementOfTag(parser: HTML5Parser, tagType: TagType): tuple[element: Element, pos: int] =
  for i in countdown(parser.openElements.high, 0):
    if parser.openElements[i].tagType == tagType:
      return (parser.openElements[i], i)
  return (nil, -1)

template last_child_of(n: Node): AdjustedInsertionLocation =
  (n, nil)

# 13.2.6.1
func appropriatePlaceForInsert(parser: HTML5Parser, target: Element): AdjustedInsertionLocation =
  assert parser.openElements[0].tagType == TAG_HTML
  if parser.fosterParenting and target.tagType in {TAG_TABLE, TAG_TBODY, TAG_TFOOT, TAG_THEAD, TAG_TR}:
    let lastTemplate = parser.lastElementOfTag(TAG_TEMPLATE)
    let lastTable = parser.lastElementOfTag(TAG_TABLE)
    if lastTemplate.element != nil and (lastTable.element == nil or lastTable.pos < lastTemplate.pos):
      return last_child_of(HTMLTemplateElement(lastTemplate.element).content)
    if lastTable.element == nil:
      return last_child_of(parser.openElements[0])
    if lastTable.element.parentNode != nil:
      return (lastTable.element.parentNode, lastTable.element)
    let previousElement = parser.openElements[lastTable.pos - 1]
    result = last_child_of(previousElement)
  else:
    result = last_child_of(target)
  if result.inside.nodeType == ELEMENT_NODE and Element(result.inside).tagType == TAG_TEMPLATE:
    result = (HTMLTemplateElement(result.inside).content, nil)

func appropriatePlaceForInsert(parser: HTML5Parser): AdjustedInsertionLocation =
  parser.appropriatePlaceForInsert(parser.currentNode)

func hasElement(elements: seq[Element], tag: TagType): bool =
  for element in elements:
    if element.tagType == tag:
      return true
  return false

func hasElementInSpecificScope(elements: seq[Element], target: Element, list: set[TagType]): bool =
  for i in countdown(elements.high, 0):
    if elements[i] == target:
      return true
    if elements[i].tagType in list:
      return false
  assert false

func hasElementInSpecificScope(elements: seq[Element], target: TagType, list: set[TagType]): bool =
  for i in countdown(elements.high, 0):
    if elements[i].tagType == target:
      return true
    if elements[i].tagType in list:
      return false
  assert false

func hasElementInSpecificScope(elements: seq[Element], target: set[TagType], list: set[TagType]): bool =
  for i in countdown(elements.high, 0):
    if elements[i].tagType in target:
      return true
    if elements[i].tagType in list:
      return false
  assert false

const Scope = {TAG_APPLET, TAG_CAPTION, TAG_HTML, TAG_TABLE, TAG_TD, TAG_TH,
               TAG_MARQUEE, TAG_OBJECT, TAG_TEMPLATE} #TODO SVG (NOTE MathML not implemented)
func hasElementInScope(elements: seq[Element], target: TagType): bool =
  return elements.hasElementInSpecificScope(target, Scope)

func hasElementInScope(elements: seq[Element], target: set[TagType]): bool =
  return elements.hasElementInSpecificScope(target, Scope)

func hasElementInScope(elements: seq[Element], target: Element): bool =
  return elements.hasElementInSpecificScope(target, Scope)

func hasElementInListItemScope(elements: seq[Element], target: TagType): bool =
  return elements.hasElementInSpecificScope(target, Scope + {TAG_OL, TAG_UL})

func hasElementInButtonScope(elements: seq[Element], target: TagType): bool =
  return elements.hasElementInSpecificScope(target, Scope + {TAG_BUTTON})

func hasElementInTableScope(elements: seq[Element], target: TagType): bool =
  return elements.hasElementInSpecificScope(target, {TAG_HTML, TAG_TABLE, TAG_TEMPLATE})

func hasElementInTableScope(elements: seq[Element], target: set[TagType]): bool =
  return elements.hasElementInSpecificScope(target, {TAG_HTML, TAG_TABLE, TAG_TEMPLATE})

func hasElementInSelectScope(elements: seq[Element], target: TagType): bool =
  for i in countdown(elements.high, 0):
    if elements[i].tagType == target:
      return true
    if elements[i].tagType notin {TAG_OPTION, TAG_OPTGROUP}:
      return false
  assert false

func createElement(parser: HTML5Parser, token: Token, namespace: Namespace, intendedParent: Node): Element =
  #TODO custom elements
  let document = intendedParent.document
  let localName = token.tagname
  let element = document.newHTMLElement(localName, namespace, tagType = token.tagtype)
  element.appendAttributes(token.attrs)
  #for k, v in token.attrs:
  #  element.appendAttribute(k, v)
  if element.isResettable():
    element.resetElement()

  if element.tagType in SupportedFormAssociatedElements and parser.form != nil and
      not parser.openElements.hasElement(TAG_TEMPLATE) and
      (element.tagType notin ListedElements or not element.attrb("form")) and
      intendedParent.inSameTree(parser.form):
    let element = FormAssociatedElement(element)
    element.setForm(parser.form)
    element.parserInserted = true
  return element

proc insert(location: AdjustedInsertionLocation, node: Node) =
  location.inside.insert(node, location.before)

proc insertForeignElement(parser: var HTML5Parser, token: Token, namespace: Namespace): Element =
  let location = parser.appropriatePlaceForInsert()
  let element = parser.createElement(token, namespace, location.inside)
  if location.inside.preInsertionValidity(element, location.before):
    #TODO custom elements
    location.insert(element)
  parser.openElements.add(element)
  return element

proc insertHTMLElement(parser: var HTML5Parser, token: Token): Element =
  return parser.insertForeignElement(token, Namespace.HTML)

proc adjustSVGAttributes(token: Token) =
  const adjusted = {
    "attributename": "attributeName",
    "attributetype": "attributeType",
    "basefrequency": "baseFrequency",
    "baseprofile": "baseProfile",
    "calcmode": "calcMode",
    "clippathunits": "clipPathUnits",
    "diffuseconstant": "diffuseConstant",
    "edgemode": "edgeMode",
    "filterunits": "filterUnits",
    "glyphref": "glyphRef",
    "gradienttransform": "gradientTransform",
    "gradientunits": "gradientUnits",
    "kernelmatrix": "kernelMatrix",
    "kernelunitlength": "kernelUnitLength",
    "keypoints": "keyPoints",
    "keysplines": "keySplines",
    "keytimes": "keyTimes",
    "lengthadjust": "lengthAdjust",
    "limitingconeangle": "limitingConeAngle",
    "markerheight": "markerHeight",
    "markerunits": "markerUnits",
    "markerwidth": "markerWidth",
    "maskcontentunits": "maskContentUnits",
    "maskunits": "maskUnits",
    "numoctaves": "numOctaves",
    "pathlength": "pathLength",
    "patterncontentunits": "patternContentUnits",
    "patterntransform": "patternTransform",
    "patternunits": "patternUnits",
    "pointsatx": "pointsAtX",
    "pointsaty": "pointsAtY",
    "pointsatz": "pointsAtZ",
    "preservealpha": "preserveAlpha",
    "preserveaspectratio": "preserveAspectRatio",
    "primitiveunits": "primitiveUnits",
    "refx": "refX",
    "refy": "refY",
    "repeatcount": "repeatCount",
    "repeatdur": "repeatDur",
    "requiredextensions": "requiredExtensions",
    "requiredfeatures": "requiredFeatures",
    "specularconstant": "specularConstant",
    "specularexponent": "specularExponent",
    "spreadmethod": "spreadMethod",
    "startoffset": "startOffset",
    "stddeviation": "stdDeviation",
    "stitchtiles": "stitchTiles",
    "surfacescale": "surfaceScale",
    "systemlanguage": "systemLanguage",
    "tablevalues": "tableValues",
    "targetx": "targetX",
    "targety": "targetY",
    "textlength": "textLength",
    "viewbox": "viewBox",
    "viewtarget": "viewTarget",
    "xchannelselector": "xChannelSelector",
    "ychannelselector": "yChannelSelector",
    "zoomandpan": "zoomAndPan",
  }.toTable()
  var todo: seq[string]
  for k in token.attrs.keys:
    if k in adjusted:
      todo.add(k)
  for s in todo:
    token.attrs[adjusted[s]] = token.attrs[s]

template insert_character_impl(parser: var HTML5Parser, data: typed) =
  let location = parser.appropriatePlaceForInsert()
  if location.inside.nodeType == DOCUMENT_NODE:
    return
  let insertNode = if location.before == nil:
    location.inside.lastChild
  else:
    location.before.previousSibling
  if insertNode != nil and insertNode.nodeType == TEXT_NODE:
    dom.Text(insertNode).data &= data
  else:
    let text = location.inside.document.newText($data)
    location.insert(text)

  if location.inside.nodeType == ELEMENT_NODE:
    let parent = Element(location.inside)
    if parent.tagType == TAG_STYLE:
      let parent = HTMLStyleElement(parent)
      parent.sheet_invalid = true

proc insertCharacter(parser: var HTML5Parser, data: string) =
  insert_character_impl(parser, data)

proc insertCharacter(parser: var HTML5Parser, data: char) =
  insert_character_impl(parser, data)

proc insertCharacter(parser: var HTML5Parser, data: Rune) =
  insert_character_impl(parser, data)

proc insertComment(parser: var HTML5Parser, token: Token, position: AdjustedInsertionLocation) =
  position.insert(position.inside.document.newComment(token.data))

proc insertComment(parser: var HTML5Parser, token: Token) =
  let position = parser.appropriatePlaceForInsert()
  position.insert(position.inside.document.newComment(token.data))

const PublicIdentifierEquals = [
  "-//W3O//DTD W3 HTML Strict 3.0//EN//",
  "-/W3C/DTD HTML 4.0 Transitional/EN",
  "HTML" 
]

const PublicIdentifierStartsWith = [
  "+//Silmaril//dtd html Pro v0r11 19970101//",
  "-//AS//DTD HTML 3.0 asWedit + extensions//",
  "-//AdvaSoft Ltd//DTD HTML 3.0 asWedit + extensions//",
  "-//IETF//DTD HTML 2.0 Level 1//",
  "-//IETF//DTD HTML 2.0 Level 2//",
  "-//IETF//DTD HTML 2.0 Strict Level 1//",
  "-//IETF//DTD HTML 2.0 Strict Level 2//",
  "-//IETF//DTD HTML 2.0 Strict//",
  "-//IETF//DTD HTML 2.0//",
  "-//IETF//DTD HTML 2.1E//",
  "-//IETF//DTD HTML 3.0//",
  "-//IETF//DTD HTML 3.2 Final//",
  "-//IETF//DTD HTML 3.2//",
  "-//IETF//DTD HTML 3//",
  "-//IETF//DTD HTML Level 0//",
  "-//IETF//DTD HTML Level 1//",
  "-//IETF//DTD HTML Level 2//",
  "-//IETF//DTD HTML Level 3//",
  "-//IETF//DTD HTML Strict Level 0//",
  "-//IETF//DTD HTML Strict Level 1//",
  "-//IETF//DTD HTML Strict Level 2//",
  "-//IETF//DTD HTML Strict Level 3//",
  "-//IETF//DTD HTML Strict//",
  "-//IETF//DTD HTML//",
  "-//Metrius//DTD Metrius Presentational//",
  "-//Microsoft//DTD Internet Explorer 2.0 HTML Strict//",
  "-//Microsoft//DTD Internet Explorer 2.0 HTML//",
  "-//Microsoft//DTD Internet Explorer 2.0 Tables//",
  "-//Microsoft//DTD Internet Explorer 3.0 HTML Strict//",
  "-//Microsoft//DTD Internet Explorer 3.0 HTML//",
  "-//Microsoft//DTD Internet Explorer 3.0 Tables//",
  "-//Netscape Comm. Corp.//DTD HTML//",
  "-//Netscape Comm. Corp.//DTD Strict HTML//",
  "-//O'Reilly and Associates//DTD HTML 2.0//",
  "-//O'Reilly and Associates//DTD HTML Extended 1.0//",
  "-//O'Reilly and Associates//DTD HTML Extended Relaxed 1.0//",
  "-//SQ//DTD HTML 2.0 HoTMetaL + extensions//",
  "-//SoftQuad Software//DTD HoTMetaL PRO 6.0::19990601::extensions to HTML 4.0//",
  "-//SoftQuad//DTD HoTMetaL PRO 4.0::19971010::extensions to HTML 4.0//",
  "-//Spyglass//DTD HTML 2.0 Extended//",
  "-//Sun Microsystems Corp.//DTD HotJava HTML//",
  "-//Sun Microsystems Corp.//DTD HotJava Strict HTML//",
  "-//W3C//DTD HTML 3 1995-03-24//",
  "-//W3C//DTD HTML 3.2 Draft//",
  "-//W3C//DTD HTML 3.2 Final//",
  "-//W3C//DTD HTML 3.2//",
  "-//W3C//DTD HTML 3.2S Draft//",
  "-//W3C//DTD HTML 4.0 Frameset//",
  "-//W3C//DTD HTML 4.0 Transitional//",
  "-//W3C//DTD HTML Experimental 19960712//",
  "-//W3C//DTD HTML Experimental 970421//",
  "-//W3C//DTD W3 HTML//",
  "-//W3O//DTD W3 HTML 3.0//",
  "-//WebTechs//DTD Mozilla HTML 2.0//",
  "-//WebTechs//DTD Mozilla HTML//", 
]

const SystemIdentifierMissingAndPublicIdentifierStartsWith = [
  "-//W3C//DTD HTML 4.01 Frameset//",
  "-//W3C//DTD HTML 4.01 Transitional//"
]

const PublicIdentifierStartsWithLimited = [
  "-//W3C//DTD XHTML 1.0 Frameset//",
  "-//W3C//DTD XHTML 1.0 Transitional//"
]

const SystemIdentifierNotMissingAndPublicIdentifierStartsWith = [
  "-//W3C//DTD HTML 4.01 Frameset//",
  "-//W3C//DTD HTML 4.01 Transitional//"
]

func quirksConditions(token: Token): bool =
  if token.quirks: return true
  if token.name.isnone or token.name.get != "html": return true
  if token.sysid.issome:
    if token.sysid.get == "http://www.ibm.com/data/dtd/v11/ibmxhtml1-transitional.dtd":
      return true
  if token.pubid.issome:
    if token.pubid.get in PublicIdentifierEquals:
      return true
    for id in PublicIdentifierStartsWith:
      if token.pubid.get.startsWithNoCase(id):
        return true
    if token.sysid.isnone:
      for id in SystemIdentifierMissingAndPublicIdentifierStartsWith:
        if token.pubid.get.startsWithNoCase(id):
          return true
  return false

func limitedQuirksConditions(token: Token): bool =
  if token.pubid.isnone: return false
  for id in PublicIdentifierStartsWithLimited:
    if token.pubid.get.startsWithNoCase(id):
      return true
  if token.sysid.isnone: return false
  for id in SystemIdentifierNotMissingAndPublicIdentifierStartsWith:
    if token.pubid.get.startsWithNoCase(id):
      return true
  return false

# 13.2.6.2
proc genericRawtextElementParsingAlgorithm(parser: var HTML5Parser, token: Token) =
  discard parser.insertHTMLElement(token)
  parser.tokenizer.state = RAWTEXT
  parser.oldInsertionMode = parser.insertionMode
  parser.insertionMode = TEXT

proc genericRCDATAElementParsingAlgorithm(parser: var HTML5Parser, token: Token) =
  discard parser.insertHTMLElement(token)
  parser.tokenizer.state = RCDATA
  parser.oldInsertionMode = parser.insertionMode
  parser.insertionMode = TEXT

proc popElement(parser: var HTML5Parser): Element =
  result = parser.openElements.pop()
  if result.tagType == TAG_TEXTAREA:
    result.resetElement()

# 13.2.6.3
proc generateImpliedEndTags(parser: var HTML5Parser) =
  const tags = {TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P,
                TAG_RB, TAG_RP, TAG_RT, TAG_RTC}
  while parser.currentNode.tagType in tags:
    discard parser.popElement()

proc generateImpliedEndTags(parser: var HTML5Parser, exclude: TagType) =
  let tags = {TAG_DD, TAG_DT, TAG_LI, TAG_OPTGROUP, TAG_OPTION, TAG_P,
                TAG_RB, TAG_RP, TAG_RT, TAG_RTC} - {exclude}
  while parser.currentNode.tagType in tags:
    discard parser.popElement()

proc generateImpliedEndTagsThoroughly(parser: var HTML5Parser) =
  const tags = {TAG_CAPTION, TAG_COLGROUP, TAG_DD, TAG_DT, TAG_LI,
                TAG_OPTGROUP, TAG_OPTION, TAG_P, TAG_RB, TAG_RP, TAG_RT,
                TAG_RTC, TAG_TBODY, TAG_TD, TAG_TFOOT, TAG_TH, TAG_THEAD,
                TAG_TR}
  while parser.currentNode.tagType in tags:
    discard parser.popElement()

# 13.2.4.3
proc pushOntoActiveFormatting(parser: var HTML5Parser, element: Element, token: Token) =
  var count = 0
  for i in countdown(parser.activeFormatting.high, 0):
    let it = parser.activeFormatting[i]
    if it[0] == nil: break
    if it[0].tagType != element.tagType: continue
    if it[0].tagType == TAG_UNKNOWN:
      if it[0].localName != element.localName: continue
    if it[0].namespace != element.namespace: continue
    var fail = false
    for k, v in it[0].attributes:
      if k notin element.attributes:
        fail = true
        break
      if v != element.attributes[k]:
        fail = true
        break
    if fail: continue
    for k, v in element.attributes:
      if k notin it[0].attributes:
        fail = true
        break
    if fail: continue
    inc count
    if count == 3:
      parser.activeFormatting.delete(i)
      break
  parser.activeFormatting.add((element, token))

proc reconstructActiveFormatting(parser: var HTML5Parser) =
  type State = enum
    REWIND, ADVANCE, CREATE
  if parser.activeFormatting.len == 0:
    return
  if parser.activeFormatting[^1][0] == nil or parser.openElements.hasElement(parser.activeFormatting[^1][0].tagType):
    return
  var i = parser.activeFormatting.high
  template entry: Element = (parser.activeFormatting[i][0])
  var state = REWIND
  while true:
    {.computedGoto.}
    case state
    of REWIND:
      if i == 0:
        state = CREATE
        continue
      dec i
      if entry != nil and not parser.openElements.hasElement(entry.tagType):
        continue
      state = ADVANCE
    of ADVANCE:
      inc i
      state = CREATE
    of CREATE:
      parser.activeFormatting[i] = (parser.insertHTMLElement(parser.activeFormatting[i][1]), parser.activeFormatting[i][1])
      if i != parser.activeFormatting.high:
        state = ADVANCE
        continue
      break

proc clearActiveFormattingTillMarker(parser: var HTML5Parser) =
  while parser.activeFormatting.len > 0 and parser.activeFormatting.pop()[0] != nil: discard

template pop_current_node = discard parser.popElement()

func isHTMLIntegrationPoint(node: Element): bool =
  return false #TODO SVG (NOTE MathML not implemented)

# Following is an implementation of the state (?) machine defined in
# https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhtml
# It uses the ad-hoc pattern matching macro `match' to apply the following
# transformations:
# * First, pairs of patterns and actions are stored in tuples (and `discard'
#   statements...)
# * These pairs are then assigned to token types, later mapped to legs of the
#   first case statement.
# * Another case statement is constructed where needed, e.g. for switching on
#   characters/tags/etc.
# * Finally, the whole thing is wrapped in a named block, to implement a
#   pseudo-goto by breaking out only when the else statement needn't be
#   executed.
#
# e.g. the following code:
#
#   match token:
#     TokenType.COMMENT => (block: echo "comment")
#     ("<p>", "<a>", "</div>") => (block: echo "p, a or closing div")
#     ("<div>", "</p>") => (block: anything_else)
#     (TokenType.START_TAG, TokenType.END_TAG) => (block: assert false, "invalid")
#     _ => (block: echo "anything else")
#
# (effectively) generates this:
#
#   block inside_not_else:
#     case token.t
#     of TokenType.COMMENT:
#       echo "comment"
#       break inside_not_else
#     of TokenType.START_TAG:
#       case token.tagtype
#       of {TAG_P, TAG_A}:
#         echo "p, a or closing div"
#         break inside_not_else
#       of TAG_DIV: discard
#       else:
#         assert false
#         break inside_not_else
#     of TokenType.END_TAG:
#       case token.tagtype
#       of TAG_DIV:
#         echo "p, a or closing div"
#         break inside_not_else
#       of TAG_P: discard
#       else:
#         assert false
#         break inside_not_else
#     else: discard
#     echo "anything else"
#
# This duplicates any code that applies for several token types, except for the
# else branch.
macro match(token: Token, body: typed): untyped =
  type OfBranchStore = object
    ofBranches: seq[(seq[NimNode], NimNode)]
    defaultBranch: NimNode
    painted: bool

  # Stores 'of' branches
  var ofBranches: array[TokenType, OfBranchStore]
  # Stores 'else', 'elif' branches
  var defaultBranch: NimNode

  const tokenTypes = (func(): Table[string, TokenType] =
    for tt in TokenType:
      result[$tt] = tt)()

  for disc in body:
    let tup = disc[0] # access actual tuple
    let pattern = `tup`[0]
    let lambda = `tup`[1]
    var action = lambda.findChild(it.kind notin {nnkSym, nnkEmpty, nnkFormalParams})
    if pattern.kind != nnkDiscardStmt and not (action.len == 2 and action[1].kind == nnkDiscardStmt and action[1][0] == newStrLitNode("anything_else")):
      action = quote do:
        `action`
        #eprint token #debug
        break inside_not_else

    var patterns = @[pattern]
    while patterns.len > 0:
      let pattern = patterns.pop()
      case pattern.kind
      of nnkSym: # simple symbols; we assume these are the enums
        ofBranches[tokenTypes[pattern.strVal]].defaultBranch = action
        ofBranches[tokenTypes[pattern.strVal]].painted = true
      of nnkCharLit:
        ofBranches[CHARACTER_ASCII].ofBranches.add((@[pattern], action))
        ofBranches[CHARACTER_ASCII].painted = true
      of nnkCurly:
        case pattern[0].kind
        of nnkCharLit:
          ofBranches[CHARACTER_ASCII].ofBranches.add((@[pattern], action))
          ofBranches[CHARACTER_ASCII].painted = true
        else: error fmt"Unsupported curly of kind {pattern[0].kind}"
      of nnkStrLit:
        var tempTokenizer = newTokenizer(newStringStream(pattern.strVal))
        for token in tempTokenizer.tokenize:
          let tt = int(token.tagtype)
          case token.t
          of START_TAG, END_TAG:
            var found = false
            for i in 0..ofBranches[token.t].ofBranches.high:
              if ofBranches[token.t].ofBranches[i][1] == action:
                found = true
                ofBranches[token.t].ofBranches[i][0].add((quote do: TagType(`tt`)))
                ofBranches[token.t].painted = true
                break
            if not found:
              ofBranches[token.t].ofBranches.add((@[(quote do: TagType(`tt`))], action))
              ofBranches[token.t].painted = true
          else: error fmt"{pattern.strVal}: Unsupported token {token} of kind {token.t}"
          break
      of nnkDiscardStmt:
        defaultBranch = action
      of nnkTupleConstr:
        for child in pattern:
          patterns.add(child)
      else: error fmt"{pattern}: Unsupported pattern of kind {pattern.kind}"

  func tokenBranchOn(tok: TokenType): NimNode =
    case tok
    of START_TAG, END_TAG:
      return quote do: token.tagtype
    of CHARACTER:
      return quote do: token.r
    of CHARACTER_ASCII:
      return quote do: token.c
    else: error fmt"Unsupported branching of token {tok}"

  template add_to_case(branch: typed) =
    if branch[0].len == 1:
      tokenCase.add(newNimNode(nnkOfBranch).add(branch[0][0]).add(branch[1]))
    else:
      var curly = newNimNode(nnkCurly)
      for node in branch[0]:
        curly.add(node)
      tokenCase.add(newNimNode(nnkOfBranch).add(curly).add(branch[1]))

  # Build case statements
  var mainCase = newNimNode(nnkCaseStmt).add(quote do: `token`.t)
  for tt in TokenType:
    let ofBranch = newNimNode(nnkOfBranch).add(quote do: TokenType(`tt`))
    let tokenCase = newNimNode(nnkCaseStmt)
    if ofBranches[tt].defaultBranch != nil:
      if ofBranches[tt].ofBranches.len > 0:
        tokenCase.add(tokenBranchOn(tt))
        for branch in ofBranches[tt].ofBranches:
          add_to_case branch
        tokenCase.add(newNimNode(nnkElse).add(ofBranches[tt].defaultBranch))
        ofBranch.add(tokenCase)
        mainCase.add(ofBranch)
      else:
        ofBranch.add(ofBranches[tt].defaultBranch)
        mainCase.add(ofBranch)
    else:
      if ofBranches[tt].ofBranches.len > 0:
        tokenCase.add(tokenBranchOn(tt))
        for branch in ofBranches[tt].ofBranches:
          add_to_case branch
        ofBranch.add(tokenCase)
        tokenCase.add(newNimNode(nnkElse).add(quote do: discard))
        mainCase.add(ofBranch)
      else:
        discard

  for t in TokenType:
    if not ofBranches[t].painted:
      mainCase.add(newNimNode(nnkElse).add(quote do: discard))
      break

  var stmts = newStmtList().add(mainCase)
  for stmt in defaultBranch:
    stmts.add(stmt)
  result = newBlockStmt(ident("inside_not_else"), stmts)

proc processInHTMLContent(parser: var HTML5Parser, token: Token, insertionMode = parser.insertionMode) =
  template pop_all_nodes =
    while parser.openElements.len > 1: pop_current_node
  template anything_else = discard "anything_else"
  macro `=>`(v: typed, body: untyped): untyped =
    quote do:
      discard (`v`, proc() = `body`)
  template _ = discard
  template reprocess(tok: Token) =
    parser.processInHTMLContent(tok)

  case insertionMode
  of INITIAL:
    match token:
      AsciiWhitespace => (block: discard)
      TokenType.COMMENT => (block: parser.insertComment(token, last_child_of(parser.document)))
      TokenType.DOCTYPE => (block:
        if token.name.isnone or token.name.get != "html" or token.pubid.issome or (token.sysid.issome and token.sysid.get != "about:legacy-compat"):
          parse_error
        let doctype = parser.document.newDocumentType(token.name.get(""), token.pubid.get(""), token.sysid.get(""))
        parser.document.append(doctype)
        if not parser.document.is_iframe_srcdoc and not parser.document.parser_cannot_change_the_mode_flag:
          if quirksConditions(token):
            parser.document.mode = QUIRKS
          elif limitedQuirksConditions(token):
            parser.document.mode = LIMITED_QUIRKS
        parser.insertionMode = BEFORE_HTML
      )
      _ => (block:
        if not parser.document.is_iframe_srcdoc:
          parse_error
        if not parser.document.parser_cannot_change_the_mode_flag:
          parser.document.mode = QUIRKS
        parser.insertionMode = BEFORE_HTML
        reprocess token
      )

  of BEFORE_HTML:
    match token:
      TokenType.DOCTYPE => (block: parse_error)
      TokenType.COMMENT => (block: parser.insertComment(token, last_child_of(parser.document)))
      AsciiWhitespace => (block: discard)
      "<html>" => (block:
        let element = parser.createElement(token, Namespace.HTML, parser.document)
        parser.document.append(element)
        parser.openElements.add(element)
        parser.insertionMode = BEFORE_HEAD
      )
      ("</head>", "</body>", "</html>", "</br>") => (block: anything_else)
      TokenType.END_TAG => (block: parse_error)
      _ => (block:
        let element = parser.document.newHTMLElement(TAG_HTML, Namespace.HTML)
        parser.document.append(element)
        parser.openElements.add(element)
        parser.insertionMode = BEFORE_HEAD
        reprocess token
      )

  of BEFORE_HEAD:
    match token:
      AsciiWhitespace => (block: discard)
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "<head>" => (block:
        parser.head = parser.insertHTMLElement(token)
        parser.insertionMode = IN_HEAD
      )
      ("</head>", "</body>", "</html>", "</br>") => (block: anything_else)
      TokenType.END_TAG => (block: parse_error)
      _ => (block:
        parser.head = parser.insertHTMLElement(Token(t: START_TAG, tagtype: TAG_HEAD))
        parser.insertionMode = IN_HEAD
        reprocess token
      )

  of IN_HEAD:
    match token:
      AsciiWhitespace => (block: discard)
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      ("<base>", "<basefont>", "<bgsound>", "<link>") => (block:
        discard parser.insertHTMLElement(token)
        pop_current_node
      )
      "<meta>" => (block:
        discard parser.insertHTMLElement(token)
        pop_current_node
        #TODO encodings
      )
      "<title>" => (block: parser.genericRCDATAElementParsingAlgorithm(token))
      "<noscript>" => (block:
        if not parser.scripting:
          discard parser.insertHTMLElement(token)
          parser.insertionMode = IN_HEAD_NOSCRIPT
        else:
          parser.genericRawtextElementParsingAlgorithm(token)
      )
      ("<noframes>", "<style>") => (block: parser.genericRawtextElementParsingAlgorithm(token))
      "<script>" => (block:
        let location = parser.appropriatePlaceForInsert()
        let element = HTMLScriptElement(parser.createElement(token, Namespace.HTML, location.inside))
        element.parserDocument = parser.document
        element.forceAsync = false
        if parser.fragment:
          element.alreadyStarted = true
        #TODO document.write (?)
        location.insert(element)
        parser.openElements.add(element)
        parser.tokenizer.state = SCRIPT_DATA
        parser.oldInsertionMode = parser.insertionMode
        parser.insertionMode = TEXT
      )
      "</head>" => (block:
        pop_current_node
        parser.insertionMode = AFTER_HEAD
      )
      ("</body>", "</html>", "</br>") => (block: anything_else)
      "<template>" => (block:
        discard parser.insertHTMLElement(token)
        parser.activeFormatting.add((nil, nil))
        parser.framesetok = false
        parser.insertionMode = IN_TEMPLATE
        parser.templateModes.add(IN_TEMPLATE)
      )
      "</template>" => (block:
        if not parser.openElements.hasElement(TAG_TEMPLATE):
          parse_error
        else:
          parser.generateImpliedEndTagsThoroughly()
          if parser.currentNode.tagType != TAG_TEMPLATE:
            parse_error
          while parser.popElement().tagType != TAG_TEMPLATE: discard
          parser.clearActiveFormattingTillMarker()
          discard parser.templateModes.pop()
          parser.resetInsertionMode()
      )
      ("<head>", TokenType.END_TAG) => (block: parse_error)
      _ => (block:
        pop_current_node
        parser.insertionMode = AFTER_HEAD
        reprocess token
      )

  of IN_HEAD_NOSCRIPT:
    match token:
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "</noscript>" => (block:
        pop_current_node
        parser.insertionMode = IN_HEAD
      )
      (AsciiWhitespace,
       TokenType.COMMENT,
       "<basefont>", "<bgsound>", "<link>", "<meta>", "<noframes>", "<style>") => (block:
        parser.processInHTMLContent(token, IN_HEAD))
      "</br>" => (block: anything_else)
      ("<head>", "<noscript>") => (block: parse_error)
      TokenType.END_TAG => (block: parse_error)
      _ => (block:
        pop_current_node
        parser.insertionMode = IN_HEAD
        reprocess token
      )

  of AFTER_HEAD:
    match token:
      AsciiWhitespace => (block: parser.insertCharacter(token.c))
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "<body>" => (block:
        discard parser.insertHTMLElement(token)
        parser.framesetok = false
        parser.insertionMode = IN_BODY
      )
      "<frameset>" => (block:
        discard parser.insertHTMLElement(token)
        parser.insertionMode = IN_FRAMESET
      )
      ("<base>", "<basefont>", "<bgsound>", "<link>", "<meta>", "<noframes>", "<script>", "<style>", "<template>", "<title>") => (block:
        parse_error
        parser.openElements.add(parser.head)
        parser.processInHTMLContent(token, IN_HEAD)
        for i in countdown(parser.openElements.high, 0):
          if parser.openElements[i] == parser.head:
            parser.openElements.delete(i)
      )
      "</template>" => (block: parser.processInHTMLContent(token, IN_HEAD))
      ("</body>", "</html>", "</br>") => (block: anything_else)
      ("<head>", TokenType.END_TAG) => (block: parse_error)
      _ => (block:
        discard parser.insertHTMLElement(Token(t: START_TAG, tagtype: TAG_BODY))
        parser.insertionMode = IN_BODY
        reprocess token
      )

  of IN_BODY:
    proc closeP(parser: var HTML5Parser) =
      parser.generateImpliedEndTags(TAG_P)
      if parser.currentNode.tagType != TAG_P: parse_error
      while parser.popElement().tagType != TAG_P: discard

    proc adoptionAgencyAlgorithm(parser: var HTML5Parser, token: Token): bool =
      if parser.currentNode.tagType != TAG_UNKNOWN and parser.currentNode.tagtype == token.tagtype or parser.currentNode.localName == token.tagname:
        var fail = true
        for it in parser.activeFormatting:
          if it[0] == parser.currentNode:
            fail = false
        if fail:
          pop_current_node
          return false
      var i = 0
      while true:
        if i >= 8: return false
        inc i
        if parser.activeFormatting.len == 0: return true
        var formatting: Element
        var formattingIndex: int
        for j in countdown(parser.activeFormatting.high, 0):
          let element = parser.activeFormatting[j][0]
          if element == nil:
            return true
          if element.tagType != TAG_UNKNOWN and element.tagtype == token.tagtype or element.qualifiedName == token.tagname:
            formatting = element
            formattingIndex = j
            break
          if j == 0:
            return true
        let stackIndex = parser.openElements.find(formatting)
        if stackIndex < 0:
          parse_error
          parser.activeFormatting.delete(formattingIndex)
          return false
        if not parser.openElements.hasElementInScope(formatting):
          parse_error
          return false
        if formatting != parser.currentNode: parse_error
        var furthestBlock: Element = nil
        var furthestBlockIndex: int
        for j in countdown(parser.openElements.high, 0):
          if parser.openElements[j] == formatting:
            break
          if parser.openElements[j].tagType in SpecialElements:
            furthestBlock = parser.openElements[j]
            furthestBlockIndex = j
            break
        if furthestBlock == nil:
          while parser.popElement() != formatting: discard
          parser.activeFormatting.delete(formattingIndex)
          return false
        let commonAncestor = parser.openElements[stackIndex - 1]
        var bookmark = formattingIndex
        var node = furthestBlock
        var aboveNode = parser.openElements[furthestBlockIndex - 1]
        var lastNode = furthestBlock
        var j = 0
        while true:
          inc j
          node = aboveNode
          let nodeStackIndex = parser.openElements.find(node)
          if node == formatting: break
          var nodeFormattingIndex = -1
          for i in countdown(parser.activeFormatting.high, 0):
            if parser.activeFormatting[i][0] == node:
              nodeFormattingIndex = i
              break
          if j > 3 and nodeFormattingIndex >= 0:
            parser.activeFormatting.delete(nodeFormattingIndex)
            if nodeFormattingIndex < bookmark:
              dec bookmark # a previous node got deleted, so decrease bookmark by one
          if nodeFormattingIndex < 0:
            aboveNode = parser.openElements[nodeStackIndex - 1]
            parser.openElements.delete(nodeStackIndex)
            if nodeStackIndex < furthestBlockIndex:
              dec furthestBlockIndex
              furthestBlock = parser.openElements[furthestBlockIndex]
            continue
          let element = parser.createElement(parser.activeFormatting[nodeFormattingIndex][1], Namespace.HTML, commonAncestor)
          parser.activeFormatting[nodeFormattingIndex] = (element, parser.activeFormatting[nodeFormattingIndex][1])
          parser.openElements[nodeStackIndex] = element
          aboveNode = parser.openElements[nodeStackIndex - 1]
          node = element
          if lastNode == furthestBlock:
            bookmark = nodeFormattingIndex + 1
          node.append(lastNode)
          lastNode = node
        let location = parser.appropriatePlaceForInsert(commonAncestor)
        location.inside.insert(lastNode, location.before)
        let token = parser.activeFormatting[formattingIndex][1]
        let element = parser.createElement(token, Namespace.HTML, furthestBlock)
        var tomove: seq[Node]
        for j in countdown(furthestBlock.childNodes.high, 0):
          let child = furthestBlock.childNodes[j]
          child.remove()
          tomove.add(child)
        for child in tomove:
          element.append(child)
        furthestBlock.append(element)
        parser.activeFormatting.insert((element, token), bookmark)
        parser.activeFormatting.delete(formattingIndex)
        parser.openElements.insert(element, furthestBlockIndex)
        parser.openElements.delete(stackIndex)

    template any_other_start_tag() =
      parser.reconstructActiveFormatting()
      discard parser.insertHTMLElement(token)

    template any_other_end_tag() =
      for i in countdown(parser.openElements.high, 0):
        let node = parser.openElements[i]
        if node.tagType != TAG_UNKNOWN and node.tagType == token.tagtype or node.localName == token.tagname:
          parser.generateImpliedEndTags(token.tagtype)
          if node != parser.currentNode: parse_error
          while parser.popElement() != node: discard
          break
        elif node.tagType in SpecialElements:
          parse_error
          return
    
    match token:
      '\0' => (block: parse_error)
      AsciiWhitespace => (block:
        parser.reconstructActiveFormatting()
        parser.insertCharacter(token.c)
      )
      TokenType.CHARACTER_ASCII => (block:
        parser.reconstructActiveFormatting()
        parser.insertCharacter(token.c)
        parser.framesetOk = false
      )
      TokenType.CHARACTER => (block:
        parser.reconstructActiveFormatting()
        parser.insertCharacter(token.r)
        parser.framesetOk = false
      )
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block:
        parse_error
        if parser.openElements.hasElement(TAG_TEMPLATE):
          discard
        else:
          for k, v in token.attrs:
            if k notin parser.openElements[0].attributes:
              parser.openElements[0].attributes[k] = v
      )
      ("<base>", "<basefont>", "<bgsound>", "<link>", "<meta>", "<noframes>", "<script>", "<style>", "<template>", "<title>",
       "</template>") => (block: parser.processInHTMLContent(token, IN_HEAD))
      "<body>" => (block:
        parse_error
        if parser.openElements.len == 1 or parser.openElements[1].tagType != TAG_BODY or parser.openElements.hasElement(TAG_TEMPLATE):
          discard
        else:
          parser.framesetOk = false
          for k, v in token.attrs:
            if k notin parser.openElements[1].attributes:
              parser.openElements[1].attributes[k] = v
      )
      "<frameset>" => (block:
        parse_error
        if parser.openElements.len == 1 or parser.openElements[1].tagType != TAG_BODY or not parser.framesetOk:
          discard
        else:
          if parser.openElements[1].parentNode != nil:
            parser.openElements[1].remove()
            pop_all_nodes
      )
      TokenType.EOF => (block:
        if parser.templateModes.len > 0:
          parser.processInHTMLContent(token, IN_TEMPLATE)
        else:
          #NOTE parse error omitted
          discard # stop
      )
      "</body>" => (block:
        if not parser.openElements.hasElementInScope(TAG_BODY):
          parse_error
        else:
          #NOTE parse error omitted
          parser.insertionMode = AFTER_BODY
      )
      "</html>" => (block:
        if not parser.openElements.hasElementInScope(TAG_BODY):
          parse_error
        else:
          #NOTE parse error omitted
          parser.insertionMode = AFTER_BODY
          reprocess token
      )
      ("<address>", "<article>", "<aside>", "<blockquote>", "<center>",
      "<details>", "<dialog>", "<dir>", "<div>", "<dl>", "<fieldset>",
      "<figcaption>", "<figure>", "<footer>", "<header>", "<hgroup>", "<main>",
      "<menu>", "<nav>", "<ol>", "<p>", "<section>", "<summary>", "<ul>") => (block:
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        discard parser.insertHTMLElement(token)
      )
      ("<h1>", "<h2>", "<h3>", "<h4>", "<h5>", "<h6>") => (block:
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        if parser.currentNode.tagType in HTagTypes:
          parse_error
          pop_current_node
        discard parser.insertHTMLElement(token)
      )
      ("<pre>", "<listing>") => (block:
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        discard parser.insertHTMLElement(token)
        parser.ignoreLF = true
        parser.framesetOk = false
      )
      "<form>" => (block:
        let hasTemplate = parser.openElements.hasElement(TAG_TEMPLATE)
        if parser.form != nil and not hasTemplate:
          parse_error
        else:
          if parser.openElements.hasElementInButtonScope(TAG_P):
            parser.closeP()
          let element = parser.insertHTMLElement(token)
          if not hasTemplate:
            parser.form = HTMLFormElement(element)
      )
      "<li>" => (block:
        parser.framesetOk = false
        for i in countdown(parser.openElements.high, 0):
          let node = parser.openElements[i]
          case node.tagType
          of TAG_LI:
            parser.generateImpliedEndTags(TAG_LI)
            if parser.currentNode.tagType != TAG_LI: parse_error
            while parser.popElement().tagType != TAG_LI: discard
            break
          of SpecialElements - {TAG_ADDRESS, TAG_DIV, TAG_P, TAG_LI}:
            break
          else: discard
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        discard parser.insertHTMLElement(token)
      )
      ("<dd>", "<dt>") => (block:
        parser.framesetOk = false
        for i in countdown(parser.openElements.high, 0):
          let node = parser.openElements[i]
          case node.tagType
          of TAG_DD:
            parser.generateImpliedEndTags(TAG_DD)
            if parser.currentNode.tagType != TAG_DD: parse_error
            while parser.popElement().tagType != TAG_DD: discard
            break
          of TAG_DT:
            parser.generateImpliedEndTags(TAG_DT)
            if parser.currentNode.tagType != TAG_DT: parse_error
            while parser.popElement().tagType != TAG_DT: discard
            break
          of SpecialElements - {TAG_ADDRESS, TAG_DIV, TAG_P, TAG_DD, TAG_DT}:
            break
          else: discard
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        discard parser.insertHTMLElement(token)
      )
      "<plaintext>" => (block:
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        discard parser.insertHTMLElement(token)
        parser.tokenizer.state = PLAINTEXT
      )
      "<button>" => (block:
        if parser.openElements.hasElementInScope(TAG_BUTTON):
          parse_error
          parser.generateImpliedEndTags()
          while parser.popElement().tagType != TAG_BUTTON: discard
        parser.reconstructActiveFormatting()
        discard parser.insertHTMLElement(token)
        parser.framesetOk = false
      )
      ("</address>", "</article>", "</aside>", "</blockquote>", "</button>",
       "</center>", "</details>", "</dialog>", "</dir>", "</div>", "</dl>",
       "</fieldset>", "</figcaption>", "</figure>", "</footer>", "</header>",
       "</hgroup>", "</listing>", "</main>", "</menu>", "</nav>", "</ol>",
       "</pre>", "</section>", "</summary>", "</ul>") => (block:
        if not parser.openElements.hasElementInScope(token.tagtype):
          parse_error
        else:
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != token.tagtype: parse_error
          while parser.popElement().tagType != token.tagtype: discard
      )
      "</form>" => (block:
        if not parser.openElements.hasElement(TAG_TEMPLATE):
          let node = parser.form
          parser.form = nil
          if node == nil or not parser.openElements.hasElementInScope(node.tagType):
            parse_error
            return
          parser.generateImpliedEndTags()
          if parser.currentNode != node: parse_error
          parser.openElements.delete(parser.openElements.find(node))
        else:
          if not parser.openElements.hasElementInScope(TAG_FORM):
            parse_error
            return
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != TAG_FORM: parse_error
          while parser.popElement().tagType != TAG_FORM: discard
      )
      "</p>" => (block:
        if not parser.openElements.hasElementInButtonScope(TAG_P):
          parse_error
          discard parser.insertHTMLElement(Token(t: START_TAG, tagtype: TAG_P))
        parser.closeP()
      )
      "</li>" => (block:
        if not parser.openElements.hasElementInListItemScope(TAG_LI):
          parse_error
        else:
          parser.generateImpliedEndTags(TAG_LI)
          if parser.currentNode.tagType != TAG_LI: parse_error
          while parser.popElement().tagType != TAG_LI: discard
      )
      ("</dd>", "</dt>") => (block:
        if not parser.openElements.hasElementInScope(token.tagtype):
          parse_error
        else:
          parser.generateImpliedEndTags(token.tagtype)
          if parser.currentNode.tagType != token.tagtype: parse_error
          while parser.popElement().tagType != token.tagtype: discard
      )
      ("</h1>", "</h2>", "</h3>", "</h4>", "</h5>", "</h6>") => (block:
        if not parser.openElements.hasElementInScope(HTagTypes):
          parse_error
        else:
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != token.tagtype: parse_error
          while parser.popElement().tagType notin HTagTypes: discard
      )
      "</sarcasm>" => (block:
        #*deep breath*
        anything_else
      )
      "<a>" => (block:
        var anchor: Element = nil
        for i in countdown(parser.activeFormatting.high, 0):
          let format = parser.activeFormatting[i]
          if format[0] == nil:
            break
          if format[0].tagType == TAG_A:
            anchor = format[0]
            break
        if anchor != nil:
          parse_error
          if parser.adoptionAgencyAlgorithm(token):
            any_other_end_tag
            return
          for i in 0..parser.activeFormatting.high:
            if parser.activeFormatting[i][0] == anchor:
              parser.activeFormatting.delete(i)
              break
          for i in 0..parser.openElements.high:
            if parser.openElements[i] == anchor:
              parser.openElements.delete(i)
              break
        parser.reconstructActiveFormatting()
        let element = parser.insertHTMLElement(token)
        parser.pushOntoActiveFormatting(element, token)
      )
      ("<b>", "<big>", "<code>", "<em>", "<font>", "<i>", "<s>", "<small>",
       "<strike>", "<strong>", "<tt>", "<u>") => (block:
        parser.reconstructActiveFormatting()
        let element = parser.insertHTMLElement(token)
        parser.pushOntoActiveFormatting(element, token)
      )
      "<nobr>" => (block:
        parser.reconstructActiveFormatting()
        if parser.openElements.hasElementInScope(TAG_NOBR):
          parse_error
          if parser.adoptionAgencyAlgorithm(token):
            any_other_end_tag
            return
          parser.reconstructActiveFormatting()
        let element = parser.insertHTMLElement(token)
        parser.pushOntoActiveFormatting(element, token)
      )
      ("</a>", "</b>", "</big>", "</code>", "</em>", "</font>", "</i>",
       "</nobr>", "</s>", "</small>", "</strike>", "</strong>", "</tt>",
       "</u>") => (block:
        if parser.adoptionAgencyAlgorithm(token):
          any_other_end_tag
          return
      )
      ("<applet>", "<marquee>", "<object>") => (block:
        parser.reconstructActiveFormatting()
        discard parser.insertHTMLElement(token)
        parser.activeFormatting.add((nil, nil))
        parser.framesetOk = false
      )
      ("</applet>", "</marquee>", "</object>") => (block:
        if not parser.openElements.hasElementInScope(token.tagtype):
          parse_error
        else:
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != token.tagtype: parse_error
          while parser.popElement().tagType != token.tagtype: discard
          parser.clearActiveFormattingTillMarker()
      )
      "<table>" => (block:
        if parser.document.mode != QUIRKS:
          if parser.openElements.hasElementInButtonScope(TAG_P):
            parser.closeP()
        discard parser.insertHTMLElement(token)
        parser.framesetOk = false
        parser.insertionMode = IN_TABLE
      )
      "</br>" => (block:
        parse_error
        parser.processInHTMLContent(Token(t: START_TAG, tagtype: TAG_BR))
      )
      ("<area>", "<br>", "<embed>", "<img>", "<keygen>", "<wbr>") => (block:
        parser.reconstructActiveFormatting()
        discard parser.insertHTMLElement(token)
        pop_current_node
        parser.framesetOk = false
      )
      "<input>" => (block:
        parser.reconstructActiveFormatting()
        discard parser.insertHTMLElement(token)
        pop_current_node
        if not token.attrs.getOrDefault("type").equalsIgnoreCase("hidden"):
          parser.framesetOk = false
      )
      ("<param>", "<source>", "<track>") => (block:
        discard parser.insertHTMLElement(token)
        pop_current_node
      )
      "<hr>" => (block:
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        discard parser.insertHTMLElement(token)
        pop_current_node
        parser.framesetOk = false
      )
      "<image>" => (block:
        #TODO ew
        let token = Token(t: START_TAG, tagtype: TAG_IMG, tagname: "img", selfclosing: token.selfclosing, attrs: token.attrs)
        reprocess token
      )
      "<textarea>" => (block:
        discard parser.insertHTMLElement(token)
        parser.ignoreLF = true
        parser.tokenizer.state = RCDATA
        parser.oldInsertionMode = parser.insertionMode
        parser.framesetOk = false
        parser.insertionMode = TEXT
      )
      "<xmp>" => (block:
        if parser.openElements.hasElementInButtonScope(TAG_P):
          parser.closeP()
        parser.reconstructActiveFormatting()
        parser.framesetOk = false
        parser.genericRawtextElementParsingAlgorithm(token)
      )
      "<iframe>" => (block:
        parser.framesetOk = false
        parser.genericRawtextElementParsingAlgorithm(token)
      )
      "<noembed>" => (block:
        parser.genericRawtextElementParsingAlgorithm(token)
      )
      "<noscript>" => (block:
        if parser.scripting:
          parser.genericRawtextElementParsingAlgorithm(token)
        else:
          any_other_start_tag
      )
      "<select>" => (block:
        parser.reconstructActiveFormatting()
        discard parser.insertHTMLElement(token)
        parser.framesetOk = false
        if parser.insertionMode in {IN_TABLE, IN_CAPTION, IN_TABLE_BODY, IN_CELL}:
          parser.insertionMode = IN_SELECT_IN_TABLE
        else:
          parser.insertionMode = IN_SELECT
      )
      ("<optgroup>", "<option>") => (block:
        if parser.currentNode.tagType == TAG_OPTION:
          pop_current_node
        parser.reconstructActiveFormatting()
        discard parser.insertHTMLElement(token)
      )
      ("<rb>", "<rtc>") => (block:
        if parser.openElements.hasElementInScope(TAG_RUBY):
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != TAG_RUBY: parse_error
        discard parser.insertHTMLElement(token)
      )
      ("<rp>", "<rt>") => (block:
        if parser.openElements.hasElementInScope(TAG_RUBY):
          parser.generateImpliedEndTags(TAG_RTC)
          if parser.currentNode.tagType notin {TAG_RUBY, TAG_RTC}: parse_error
        discard parser.insertHTMLElement(token)
      )
      #NOTE <math> (not implemented)
      #TODO <svg> (SVG)
      ("<caption>", "<col>", "<colgroup>", "<frame>", "<head>", "<tbody>",
       "<td>", "<tfoot>", "<th>", "<thead>", "<tr>") => (block: parse_error)
      TokenType.START_TAG => (block: any_other_start_tag)
      TokenType.END_TAG => (block: any_other_end_tag)

  of TEXT:
    match token:
      TokenType.CHARACTER_ASCII => (block:
        assert token.c != '\0'
        parser.insertCharacter(token.c)
      )
      TokenType.CHARACTER => (block:
        parser.insertCharacter(token.r)
      )
      TokenType.EOF => (block:
        parse_error
        if parser.currentNode.tagType == TAG_SCRIPT:
          HTMLScriptElement(parser.currentNode).alreadyStarted = true
        pop_current_node
        parser.insertionMode = parser.oldInsertionMode
        reprocess token
      )
      "</script>" => (block:
        #TODO microtask
        pop_current_node
        parser.insertionMode = parser.oldInsertionMode
        #TODO document.write() ?
        #TODO prepare script element
        #TODO uh implement scripting or something
      )
      TokenType.END_TAG => (block:
        pop_current_node
        parser.insertionMode = parser.oldInsertionMode
      )

  of IN_TABLE:
    template clear_the_stack_back_to_a_table_context() =
      while parser.currentNode.tagType notin {TAG_TABLE, TAG_TEMPLATE, TAG_HTML}:
        pop_current_node

    match token:
      (TokenType.CHARACTER_ASCII, TokenType.CHARACTER) => (block:
        if parser.currentNode.tagType in {TAG_TABLE, TAG_TBODY, TAG_TFOOT, TAG_THEAD, TAG_TR}:
          parser.pendingTableChars = ""
          parser.pendingTableCharsWhitespace = true
          parser.oldInsertionMode = parser.insertionMode
          parser.insertionMode = IN_TABLE_TEXT
          reprocess token
        else: # anything else
          parse_error
          parser.fosterParenting = true
          parser.processInHTMLContent(token, IN_BODY)
          parser.fosterParenting = false
      )
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<caption>" => (block: 
        clear_the_stack_back_to_a_table_context
        parser.activeFormatting.add((nil, nil))
        discard parser.insertHTMLElement(token)
        parser.insertionMode = IN_CAPTION
      )
      "<colgroup>" => (block:
        clear_the_stack_back_to_a_table_context
        discard parser.insertHTMLElement(Token(t: START_TAG, tagtype: TAG_COLGROUP))
        parser.insertionMode = IN_COLUMN_GROUP
      )
      ("<tbody>", "<tfoot>", "<thead>") => (block:
        clear_the_stack_back_to_a_table_context
        discard parser.insertHTMLElement(token)
        parser.insertionMode = IN_TABLE_BODY
      )
      ("<td>", "<th>", "<tr>") => (block:
        clear_the_stack_back_to_a_table_context
        discard parser.insertHTMLElement(Token(t: START_TAG, tagtype: TAG_TBODY))
        parser.insertionMode = IN_TABLE_BODY
      )
      "<table>" => (block:
        parse_error
        if not parser.openElements.hasElementInScope(TAG_TABLE):
          discard
        else:
          while parser.popElement().tagType != TAG_TABLE: discard
          parser.resetInsertionMode()
          reprocess token
      )
      "</table>" => (block:
        if not parser.openElements.hasElementInScope(TAG_TABLE):
          parse_error
        else:
          while parser.popElement().tagType != TAG_TABLE: discard
          parser.resetInsertionMode()
      )
      ("</body>", "</caption>", "</col>", "</colgroup>", "</html>", "</tbody>",
       "</td>", "</tfoot>", "</th>", "</thead>", "</tr>") => (block:
        parse_error
      )
      ("<style>", "<script>", "<template>", "</template>") => (block:
        parser.processInHTMLContent(token, IN_HEAD)
      )
      "<input>" => (block:
        if not token.attrs.getOrDefault("type").equalsIgnoreCase("hidden"):
          # anything else
          parse_error
          parser.fosterParenting = true
          parser.processInHTMLContent(token, IN_BODY)
          parser.fosterParenting = false
        else:
          parse_error
          discard parser.insertHTMLElement(token)
          pop_current_node
      )
      "<form>" => (block:
        parse_error
        if parser.form != nil or parser.openElements.hasElement(TAG_TEMPLATE):
          discard
        else:
          parser.form = HTMLFormElement(parser.insertHTMLElement(token))
          pop_current_node
      )
      TokenType.EOF => (block:
        parser.processInHTMLContent(token, IN_BODY)
      )
      _ => (block:
        parse_error
        parser.fosterParenting = true
        parser.processInHTMLContent(token, IN_BODY)
        parser.fosterParenting = false
      )

  of IN_TABLE_TEXT:
    match token:
      '\0' => (block: parse_error)
      TokenType.CHARACTER_ASCII => (block:
        if token.c notin AsciiWhitespace:
          parser.pendingTableCharsWhitespace = false
        parser.pendingTableChars &= token.c
      )
      TokenType.CHARACTER => (block:
        parser.pendingTableChars &= token.r
        parser.pendingTableCharsWhitespace = false
      )
      _ => (block:
        if not parser.pendingTableCharsWhitespace:
          # I *think* this is effectively the same thing the specification wants...
          parse_error
          parser.fosterParenting = true
          parser.reconstructActiveFormatting()
          parser.insertCharacter(parser.pendingTableChars)
          parser.framesetOk = false
          parser.fosterParenting = false
        else:
          parser.insertCharacter(parser.pendingTableChars)
        parser.insertionMode = parser.oldInsertionMode
        reprocess token
      )

  of IN_CAPTION:
    match token:
      "</caption>" => (block:
        if not parser.openElements.hasElementInTableScope(TAG_CAPTION):
          parse_error
        else:
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != TAG_CAPTION: parse_error
          while parser.popElement().tagType != TAG_CAPTION: discard
          parser.clearActiveFormattingTillMarker()
          parser.insertionMode = IN_TABLE
      )
      ("<caption>", "<col>", "<colgroup>", "<tbody>", "<td>", "<tfoot>",
       "<th>", "<thead>", "<tr>", "</table>") => (block:
        if not parser.openElements.hasElementInTableScope(TAG_CAPTION):
          parse_error
        else:
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != TAG_CAPTION: parse_error
          parser.clearActiveFormattingTillMarker()
          parser.insertionMode = IN_TABLE
          reprocess token
      )
      ("</body>", "</col>", "</colgroup>", "</html>", "</tbody>", "</td>",
       "</tfoot>", "</th>", "</thead>", "</tr>") => (block: parse_error)
      _ => (block: parser.processInHTMLContent(token, IN_BODY))

  of IN_COLUMN_GROUP:
    match token:
      AsciiWhitespace => (block: parser.insertCharacter(token.c))
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "<col>" => (block:
        discard parser.insertHTMLElement(token)
        pop_current_node
      )
      "</colgroup>" => (block:
        if parser.currentNode.tagType != TAG_COLGROUP:
          parse_error
        else:
          pop_current_node
          parser.insertionMode = IN_TABLE
      )
      "</col>" => (block: parse_error)
      ("<template>", "</template>") => (block:
        parser.processInHTMLContent(token, IN_HEAD)
      )
      TokenType.EOF => (block: parser.processInHTMLContent(token, IN_BODY))
      _ => (block:
        if parser.currentNode.tagType != TAG_COLGROUP:
          parse_error
        else:
          pop_current_node
          parser.insertionMode = IN_TABLE
          reprocess token
      )

  of IN_TABLE_BODY:
    template clear_the_stack_back_to_a_table_body_context() =
      while parser.currentNode.tagType notin {TAG_TBODY, TAG_TFOOT, TAG_THEAD, TAG_TEMPLATE, TAG_HTML}:
        pop_current_node

    match token:
      "<tr>" => (block:
        clear_the_stack_back_to_a_table_body_context
        discard parser.insertHTMLElement(token)
        parser.insertionMode = IN_ROW
      )
      ("<th>", "<td>") => (block:
        parse_error
        clear_the_stack_back_to_a_table_body_context
        discard parser.insertHTMLElement(Token(t: START_TAG, tagtype: TAG_TR))
        parser.insertionMode = IN_ROW
        reprocess token
      )
      ("</tbody>", "</tfoot>", "</thead>") => (block:
        if not parser.openElements.hasElementInTableScope(token.tagtype):
          parse_error
        else:
          clear_the_stack_back_to_a_table_body_context
          pop_current_node
          parser.insertionMode = IN_TABLE
      )
      ("<caption>", "<col>", "<colgroup>", "<tbody>", "<tfoot>", "<thead>",
       "</table>") => (block:
        if not parser.openElements.hasElementInTableScope({TAG_TBODY, TAG_THEAD, TAG_TFOOT}):
          parse_error
        else:
          clear_the_stack_back_to_a_table_body_context
          pop_current_node
          parser.insertionMode = IN_TABLE
          reprocess token
      )
      ("</body>", "</caption>", "</col>", "</colgroup>", "</html>", "</td>",
       "</th>", "</tr>") => (block:
        parse_error
      )
      _ => (block: parser.processInHTMLContent(token, IN_TABLE))

  of IN_ROW:
    template clear_the_stack_back_to_a_table_row_context() =
      while parser.currentNode.tagType notin {TAG_TR, TAG_TEMPLATE, TAG_HTML}:
        pop_current_node

    match token:
      ("<th>", "<td>") => (block:
        clear_the_stack_back_to_a_table_row_context
        discard parser.insertHTMLElement(token)
        parser.insertionMode = IN_CELL
        parser.activeFormatting.add((nil, nil))
      )
      "</tr>" => (block:
        if not parser.openElements.hasElementInTableScope(TAG_TR):
          parse_error
        else:
          clear_the_stack_back_to_a_table_row_context
          pop_current_node
          parser.insertionMode = IN_TABLE_BODY
      )
      ("<caption>", "<col>", "<colgroup>", "<tbody>", "<tfoot>", "<thead>",
       "<tr>", "</table>") => (block:
        if not parser.openElements.hasElementInTableScope(TAG_TR):
          parse_error
        else:
          clear_the_stack_back_to_a_table_row_context
          pop_current_node
          parser.insertionMode = IN_TABLE_BODY
          reprocess token
      )
      ("</tbody>", "</tfoot>", "</thead>") => (block:
        if not parser.openElements.hasElementInTableScope(token.tagtype):
          parse_error
        elif not parser.openElements.hasElementInTableScope(TAG_TR):
          discard
        else:
          clear_the_stack_back_to_a_table_row_context
          pop_current_node
          parser.insertionMode = IN_BODY
          reprocess token
      )
      ("</body>", "</caption>", "</col>", "</colgroup>", "</html>", "</td>",
       "</th>") => (block: parse_error)
      _ => (block: parser.processInHTMLContent(token, IN_TABLE))

  of IN_CELL:
    template close_cell() =
      parser.generateImpliedEndTags()
      if parser.currentNode.tagType notin {TAG_TD, TAG_TH}: parse_error
      while parser.popElement().tagType notin {TAG_TD, TAG_TH}: discard
      parser.clearActiveFormattingTillMarker()
      parser.insertionMode = IN_ROW

    match token:
      ("</td>", "</th>") => (block:
        if not parser.openElements.hasElementInTableScope(token.tagtype):
          parse_error
        else:
          parser.generateImpliedEndTags()
          if parser.currentNode.tagType != token.tagtype: parse_error
          while parser.popElement().tagType != token.tagtype: discard
          parser.clearActiveFormattingTillMarker()
          parser.insertionMode = IN_ROW
      )
      ("<caption>", "<col>", "<colgroup>", "<tbody>", "<td>", "<tfoot>",
       "<thead>", "<tr>") => (block:
        if not parser.openElements.hasElementInTableScope({TAG_TD, TAG_TH}):
          parse_error
        else:
          close_cell
      )
      ("</body>", "</caption>", "</col>", "</colgroup>",
       "</html>") => (block: parse_error)
      ("</table>", "</tbody>", "</tfoot>", "</thead>", "</tr>") => (block:
        if not parser.openElements.hasElementInTableScope(token.tagtype):
          parse_error
        else:
          close_cell
          reprocess token
      )
      _ => (block: parser.processInHTMLContent(token, IN_BODY))

  of IN_SELECT:
    match token:
      '\0' => (block: parse_error)
      TokenType.CHARACTER_ASCII => (block: parser.insertCharacter(token.c))
      TokenType.CHARACTER => (block: parser.insertCharacter(token.r))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "<option>" => (block:
        if parser.currentNode.tagType == TAG_OPTION:
          pop_current_node
        discard parser.insertHTMLElement(token)
      )
      "<optgroup>" => (block:
        if parser.currentNode.tagType == TAG_OPTION:
          pop_current_node
        if parser.currentNode.tagType == TAG_OPTGROUP:
          pop_current_node
        discard parser.insertHTMLElement(token)
      )
      "</optgroup>" => (block:
        if parser.currentNode.tagType == TAG_OPTION:
          if parser.openElements.len > 1 and parser.openElements[^2].tagType == TAG_OPTGROUP:
            pop_current_node
        if parser.currentNode.tagType == TAG_OPTGROUP:
          pop_current_node
        else:
          parse_error
      )
      "</option>" => (block:
        if parser.currentNode.tagType == TAG_OPTION:
          pop_current_node
        else:
          parse_error
      )
      "</select>" => (block:
        if not parser.openElements.hasElementInSelectScope(TAG_SELECT):
          parse_error
        else:
          while parser.popElement().tagType != TAG_SELECT: discard
          parser.resetInsertionMode()
      )
      "<select>" => (block:
        parse_error
        if parser.openElements.hasElementInSelectScope(TAG_SELECT):
          while parser.popElement().tagType != TAG_SELECT: discard
          parser.resetInsertionMode()
      )
      ("<input>", "<keygen>", "<textarea>") => (block:
        parse_error
        if not parser.openElements.hasElementInSelectScope(TAG_SELECT):
          discard
        else:
          while parser.popElement().tagType != TAG_SELECT: discard
          parser.resetInsertionMode()
          reprocess token
      )
      ("<script>", "<template>", "</template>") => (block: parser.processInHTMLContent(token, IN_HEAD))
      TokenType.EOF => (block: parser.processInHTMLContent(token, IN_BODY))
      _ => (block: parse_error)

  of IN_SELECT_IN_TABLE:
    match token:
      ("<caption>", "<table>", "<tbody>", "<tfoot>", "<thead>", "<tr>", "<td>",
       "<th>") => (block:
        parse_error
        while parser.popElement().tagType != TAG_SELECT: discard
        parser.resetInsertionMode()
        reprocess token
      )
      ("</caption>", "</table>", "</tbody>", "</tfoot>", "</thead>", "</tr>",
       "</td>", "</th>") => (block:
        parse_error
        if not parser.openElements.hasElementInTableScope(token.tagtype):
          discard
        else:
          while parser.popElement().tagType != TAG_SELECT: discard
          parser.resetInsertionMode()
          reprocess token
      )
      _ => (block: parser.processInHTMLContent(token, IN_SELECT))

  of IN_TEMPLATE:
    match token:
      (TokenType.CHARACTER_ASCII, TokenType.CHARACTER, TokenType.DOCTYPE) => (block:
        parser.processInHTMLContent(token, IN_BODY)
      )
      ("<base>", "<basefont>", "<bgsound>", "<link>", "<meta>", "<noframes>",
       "<script>", "<style>", "<template>", "<title>", "</template>") => (block:
        parser.processInHTMLContent(token, IN_HEAD)
      )
      ("<caption>", "<colgroup>", "<tbody>", "<tfoot>", "<thead>") => (block:
        discard parser.templateModes.pop()
        parser.templateModes.add(IN_TABLE)
        parser.insertionMode = IN_TABLE
        reprocess token
      )
      "<col>" => (block:
        discard parser.templateModes.pop()
        parser.templateModes.add(IN_COLUMN_GROUP)
        parser.insertionMode = IN_COLUMN_GROUP
        reprocess token
      )
      "<tr>" => (block:
        discard parser.templateModes.pop()
        parser.templateModes.add(IN_TABLE_BODY)
        parser.insertionMode = IN_TABLE_BODY
        reprocess token
      )
      ("<td>", "<th>") => (block:
        discard parser.templateModes.pop()
        parser.templateModes.add(IN_ROW)
        parser.insertionMode = IN_ROW
        reprocess token
      )
      TokenType.START_TAG => (block:
        discard parser.templateModes.pop()
        parser.templateModes.add(IN_BODY)
        parser.insertionMode = IN_BODY
        reprocess token
      )
      TokenType.END_TAG => (block: parse_error)
      TokenType.EOF => (block:
        if not parser.openElements.hasElement(TAG_TEMPLATE):
          discard # stop
        else:
          parse_error
          while parser.popElement().tagType != TAG_TEMPLATE: discard
          parser.clearActiveFormattingTillMarker()
          discard parser.templateModes.pop()
          parser.resetInsertionMode()
          reprocess token
      )

  of AFTER_BODY:
    match token:
      AsciiWhitespace => (block: parser.processInHTMLContent(token, IN_BODY))
      TokenType.COMMENT => (block: parser.insertComment(token, last_child_of(parser.openElements[0])))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "</html>" => (block:
        if parser.fragment:
          parse_error
        else:
          parser.insertionMode = AFTER_AFTER_BODY
      )
      TokenType.EOF => (block: discard) # stop
      _ => (block:
        parse_error
        parser.insertionMode = IN_BODY
        reprocess token
      )

  of IN_FRAMESET:
    match token:
      AsciiWhitespace => (block: parser.insertCharacter(token.c))
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "<frameset>" => (block:
        if parser.currentNode == parser.document.html:
          parse_error
        else:
          pop_current_node
        if not parser.fragment and parser.currentNode.tagType != TAG_FRAMESET:
          parser.insertionMode = AFTER_FRAMESET
      )
      "<frame>" => (block:
        discard parser.insertHTMLElement(token)
        pop_current_node
      )
      "<noframes>" => (block: parser.processInHTMLContent(token, IN_HEAD))
      TokenType.EOF => (block:
        if parser.currentNode != parser.document.html: parse_error
        # stop
      )
      _ => (block: parse_error)

  of AFTER_FRAMESET:
    match token:
      AsciiWhitespace => (block: parser.insertCharacter(token.c))
      TokenType.COMMENT => (block: parser.insertComment(token))
      TokenType.DOCTYPE => (block: parse_error)
      "<html>" => (block: parser.processInHTMLContent(token, IN_BODY))
      "</html>" => (block: parser.insertionMode = AFTER_AFTER_FRAMESET)
      "<noframes>" => (block: parser.processInHTMLContent(token, IN_HEAD))
      TokenType.EOF => (block: discard) # stop
      _ => (block: parse_error)

  of AFTER_AFTER_BODY:
    match token:
      TokenType.COMMENT => (block: parser.insertComment(token, last_child_of(parser.document)))
      (TokenType.DOCTYPE, AsciiWhitespace, "<html>") => (block: parser.processInHTMLContent(token, IN_BODY))
      TokenType.EOF => (block: discard) # stop
      _ => (block:
        parse_error
        parser.insertionMode = IN_BODY
        reprocess token
      )

  of AFTER_AFTER_FRAMESET:
    match token:
      TokenType.COMMENT => (block: parser.insertComment(token, last_child_of(parser.document)))
      (TokenType.DOCTYPE, AsciiWhitespace, "<html>") => (block: parser.processInHTMLContent(token, IN_BODY))
      TokenType.EOF => (block: discard) # stop
      "<noframes>" => (block: parser.processInHTMLContent(token, IN_HEAD))
      _ => (block: parse_error)

proc processInForeignContent(parser: var HTML5Parser, token: Token) =
  macro `=>`(v: typed, body: untyped): untyped =
    quote do:
      discard (`v`, proc() = `body`)
  template script_end_tag() =
    pop_current_node
    #TODO document.write (?)
    #TODO SVG

  template any_other_end_tag() =
    if parser.currentNode.localName != token.tagname: parse_error
    for i in countdown(parser.openElements.high, 1):
      let node = parser.openElements[i]
      if node.localName == token.tagname:
        while parser.popElement() != node: discard
        break
      if node.namespace == Namespace.HTML: break
      parser.processInHTMLContent(token)
  const CaseTable = {
    "altglyph": "altGlyph",
    "altglyphdef": "altGlyphDef",
    "altglyphitem": "altGlyphItem",
    "animatecolor": "animateColor",
    "animatemotion": "animateMotion",
    "animatetransform": "animateTransform",
    "clippath": "clipPath",
    "feblend": "feBlend",
    "fecolormatrix": "feColorMatrix",
    "fecomponenttransfer": "feComponentTransfer",
    "fecomposite": "feComposite",
    "feconvolvematrix": "feConvolveMatrix",
    "fediffuselighting": "feDiffuseLighting",
    "fedisplacementmap": "feDisplacementMap",
    "fedistantlight": "feDistantLight",
    "fedropshadow": "feDropShadow",
    "feflood": "feFlood",
    "fefunca": "feFuncA",
    "fefuncb": "feFuncB",
    "fefuncg": "feFuncG",
    "fefuncr": "feFuncR",
    "fegaussianblur": "feGaussianBlur",
    "feimage": "feImage",
    "femerge": "feMerge",
    "femergenode": "feMergeNode",
    "femorphology": "feMorphology",
    "feoffset": "feOffset",
    "fepointlight": "fePointLight",
    "fespecularlighting": "feSpecularLighting",
    "fespotlight": "feSpotLight",
    "fetile": "feTile",
    "feturbulence": "feTurbulence",
    "foreignobject": "foreignObject",
    "glyphref": "glyphRef",
    "lineargradient": "linearGradient",
    "radialgradient": "radialGradient",
    "textpath": "textPath",
  }.toTable()

  match token:
    '\0' => (block:
      parse_error
      parser.insertCharacter(Rune(0xFFFD))
    )
    AsciiWhitespace => (block: parser.insertCharacter(token.c))
    TokenType.CHARACTER_ASCII => (block: parser.insertCharacter(token.c))
    TokenType.CHARACTER => (block: parser.insertCharacter(token.r))
    TokenType.DOCTYPE => (block: parse_error)
    ("<b>", "<big>", "<blockquote>", "<body>", "<br>", "<center>", "<code>",
     "<dd>", "<div>", "<dl>", "<dt>", "<em>", "<embed>", "<h1>", "<h2>", "<h3>",
     "<h4>", "<h5>", "<h6>", "<head>", "<hr>", "<i>", "<img>", "<li>",
     "<listing>", "<menu>", "<meta>", "<nobr>", "<ol>", "<p>", "<pre>",
     "<ruby>", "<s>", "<small>", "<span>", "<strong>", "<strike>", "<sub>",
     "<sup>", "<table>", "<tt>", "<u>", "<ul>", "<var>") => (block:
      parse_error
      #NOTE MathML not implemented
      while not (parser.currentNode.isHTMLIntegrationPoint() or parser.currentNode.inHTMLNamespace()):
        pop_current_node
      parser.processInHTMLContent(token)
    )
    TokenType.START_TAG => (block:
      #NOTE MathML not implemented

      if parser.adjustedCurrentNode.namespace == Namespace.SVG:
        if token.tagname in CaseTable:
          token.tagname = CaseTable[token.tagname]
        adjustSVGAttributes(token)
      #TODO adjust foreign attributes
      let element = parser.insertForeignElement(token, parser.adjustedCurrentNode.namespace)
      if token.selfclosing and element.inSVGNamespace():
        script_end_tag
      else:
        pop_current_node
    )
    "</script>" => (block:
      if parser.currentNode.namespace == Namespace.SVG and parser.currentNode.localName == "script": #TODO SVG
        script_end_tag
      else:
        any_other_end_tag
    )
    TokenType.END_TAG => (block: any_other_end_tag)

proc constructTree(parser: var HTML5Parser): Document =
  for token in parser.tokenizer.tokenize:
    if parser.ignoreLF:
      parser.ignoreLF = false
      if token.t == CHARACTER_ASCII and token.c == '\n':
        continue
    if parser.openElements.len == 0 or
       parser.adjustedCurrentNode.inHTMLNamespace() or
       parser.adjustedCurrentNode.isHTMLIntegrationPoint() and token.t in {START_TAG, CHARACTER, CHARACTER_ASCII} or
       token.t == EOF:
      #NOTE MathML not implemented
      parser.processInHTMLContent(token)
    else:
      parser.processInForeignContent(token)

  #TODO document.write (?)
  #TODO etc etc...

  return parser.document

proc parseHTML5*(inputStream: Stream): Document =
  var parser: HTML5Parser
  parser.document = newDocument()
  parser.tokenizer = inputStream.newTokenizer()
  return parser.constructTree()

proc newDOMParser*(): DOMParser {.jsctor.} =
  new(result)

proc parseFromString*(parser: DOMParser, str: string, t: string): Document {.jserr, jsfunc.} =
  case t
  of "text/html":
    return parseHTML5(newStringStream(str))
  of "text/xml", "application/xml", "application/xhtml+xml", "image/svg+xml":
    JS_THROW JS_InternalError, "XML parsing is not supported yet"
  else:
    JS_THROW JS_TypeError, "Invalid mime type"

proc addHTMLModule*(ctx: JSContext) =
  ctx.registerType(DOMParser)