about summary refs log blame commit diff stats
path: root/src/css/selectorparser.nim
blob: 57532ca3d0b86bdfe97ac43c2f2cbc18e7b5bbc2 (plain) (tree)
1
2
3
4
5
6
7
8
9
10
              
              
               

              
                    
                


                      

                                                                        
 
                    

                                             
                                                                         
 

                                                                           
                                                                         

                                                                   
 
                        

                                                           
 
                         
                                   


                                 
 
                                          


                         

                             










                                            
                         
                       
                       
 



                                               
                           
                                           
                          

                   

                 




                                                                          

                                      
 















                                                                            
               
                                 
                                                       
 
                                         
 
                                  


                           

                           


                       







                                                 




                          






                                           
                        


                                                                 
                                    
                        
                                      
                         
                                           


                         

                                  


                                           
                  
 


















                                                 
 



                                         
                                   
                  


                             
                  
                                    



                                     
                                               
                                    
                    
                                       






                                       
                                                           
               


                        
                                                   
                  

                                 






                                                   

                    














































































                                                                                                                            
                                                     





























































                                                                                                         
                        

                              
                         
                    

                                        
                              


                                                                   






                                                           
                         


















                                                                                           
                                

                                                                    
         










                                                                       





















                                                                     
 
                                                                                                   
                                 


                                                            
import options
import streams
import strutils
import unicode

import css/cssparser
import html/tags

type
  SelectorType* = enum
    TYPE_SELECTOR, UNKNOWN_TYPE_SELECTOR, ID_SELECTOR, ATTR_SELECTOR,
    CLASS_SELECTOR, UNIVERSAL_SELECTOR, PSEUDO_SELECTOR, PSELEM_SELECTOR

  PseudoElem* = enum
    PSEUDO_NONE, PSEUDO_BEFORE, PSEUDO_AFTER,
    # internal
    PSEUDO_INPUT_TEXT, PSEUDO_TEXTAREA_TEXT, PSEUDO_IMAGE, PSEUDO_NEWLINE

  PseudoClass* = enum
    PSEUDO_FIRST_CHILD, PSEUDO_LAST_CHILD, PSEUDO_ONLY_CHILD, PSEUDO_HOVER,
    PSEUDO_ROOT, PSEUDO_NTH_CHILD, PSEUDO_NTH_LAST_CHILD, PSEUDO_CHECKED,
    PSEUDO_FOCUS, PSEUDO_IS, PSEUDO_NOT, PSEUDO_WHERE, PSEUDO_LANG,
    PSEUDO_LINK, PSEUDO_VISITED

  CombinatorType* = enum
    NO_COMBINATOR, DESCENDANT_COMBINATOR, CHILD_COMBINATOR,
    NEXT_SIBLING_COMBINATOR, SUBSEQ_SIBLING_COMBINATOR

  SelectorParser = object
    selectors: seq[ComplexSelector]
    cvals: seq[CSSComponentValue]
    at: int
    failed: bool

  Selector* = ref object # Simple selector
    case t*: SelectorType
    of TYPE_SELECTOR:
      tag*: TagType
    of UNKNOWN_TYPE_SELECTOR:
      tagstr*: string
    of ID_SELECTOR:
      id*: string
    of ATTR_SELECTOR:
      attr*: string
      value*: string
      rel*: char
    of CLASS_SELECTOR:
      class*: string
    of UNIVERSAL_SELECTOR: #TODO namespaces?
      discard
    of PSEUDO_SELECTOR:
      pseudo*: PseudoData
    of PSELEM_SELECTOR:
      elem*: PseudoElem

  PseudoData* = object
    case t*: PseudoClass
    of PSEUDO_NTH_CHILD, PSEUDO_NTH_LAST_CHILD:
      anb*: CSSAnB
      ofsels*: SelectorList
    of PSEUDO_IS, PSEUDO_WHERE, PSEUDO_NOT:
      fsels*: SelectorList
    of PSEUDO_LANG:
      s*: string
    else: discard

  CompoundSelector* = object
    ct*: CombinatorType # relation to the next entry in a ComplexSelector.
    sels*: seq[Selector]

  ComplexSelector* = seq[CompoundSelector]

  SelectorList* = seq[ComplexSelector]

iterator items*(sels: CompoundSelector): Selector {.inline.} =
  for it in sels.sels:
    yield it

func `[]`*(sels: CompoundSelector, i: int): Selector {.inline.} =
  return sels.sels[i]

func `[]`*(sels: CompoundSelector, i: BackwardsIndex): Selector {.inline.} =
  return sels.sels[i]

func len*(sels: CompoundSelector): int {.inline.} =
  return sels.sels.len

proc add*(sels: var CompoundSelector, sel: Selector) {.inline.} =
  sels.sels.add(sel)

# For debugging
func tostr(ftype: enum): string =
  return ($ftype).split('_')[1..^1].join("-").tolower()

func `$`*(cxsel: ComplexSelector): string

func `$`*(sel: Selector): string =
  case sel.t
  of TYPE_SELECTOR:
    return tagName(sel.tag)
  of UNKNOWN_TYPE_SELECTOR:
    return sel.tagstr
  of ID_SELECTOR:
    return '#' & sel.id
  of ATTR_SELECTOR:
    var rel = ""
    if sel.rel == '=':
      rel = "="
    elif sel.rel == ' ':
      discard
    else:
      rel = sel.rel & '='
    return '[' & sel.attr & rel & sel.value & ']'
  of CLASS_SELECTOR:
    return '.' & sel.class
  of UNIVERSAL_SELECTOR:
    return "*"
  of PSEUDO_SELECTOR:
    result = ':' & sel.pseudo.t.tostr()
    case sel.pseudo.t
    of PSEUDO_IS, PSEUDO_NOT, PSEUDO_WHERE:
      result &= '('
      for fsel in sel.pseudo.fsels:
        result &= $fsel
        if fsel != sel.pseudo.fsels[^1]:
          result &= ", "
      result &= ')'
    of PSEUDO_NTH_CHILD, PSEUDO_NTH_LAST_CHILD:
      result &= '(' & $sel.pseudo.anb.A & 'n' & $sel.pseudo.anb.B
      if sel.pseudo.ofsels.len != 0:
        result &= " of "
        for fsel in sel.pseudo.ofsels:
          result &= $fsel
          if fsel != sel.pseudo.ofsels[^1]:
            result &= ','
      result &= ')'
    else: discard
  of PSELEM_SELECTOR:
    return "::" & sel.elem.tostr()

func `$`*(sels: CompoundSelector): string =
  for sel in sels:
    result &= $sel

func `$`*(cxsel: ComplexSelector): string =
  for sels in cxsel:
    result &= $sels
    case sels.ct
    of DESCENDANT_COMBINATOR: result &= ' '
    of CHILD_COMBINATOR: result &= " > "
    of NEXT_SIBLING_COMBINATOR: result &= " + "
    of SUBSEQ_SIBLING_COMBINATOR: result &= " ~ "
    of NO_COMBINATOR: discard

func `$`*(slist: SelectorList): string =
  var s = false
  for cxsel in slist:
    if s:
      result &= ", "
    result &= $cxsel
    s = true

func getSpecificity*(cxsel: ComplexSelector): int

func getSpecificity(sel: Selector): int =
  case sel.t
  of ID_SELECTOR:
    result += 1000000
  of CLASS_SELECTOR, ATTR_SELECTOR:
    result += 1000
  of PSEUDO_SELECTOR:
    case sel.pseudo.t
    of PSEUDO_IS, PSEUDO_NOT:
      var best = 0
      for child in sel.pseudo.fsels:
        let s = getSpecificity(child)
        if s > best:
          best = s
      result += best
    of PSEUDO_NTH_CHILD, PSEUDO_NTH_LAST_CHILD:
      if sel.pseudo.ofsels.len != 0:
        var best = 0
        for child in sel.pseudo.ofsels:
          let s = getSpecificity(child)
          if s > best:
            best = s
        result += best
      result += 1000
    of PSEUDO_WHERE: discard
    else: result += 1000
  of TYPE_SELECTOR, UNKNOWN_TYPE_SELECTOR, PSELEM_SELECTOR:
    result += 1
  of UNIVERSAL_SELECTOR:
    discard

func getSpecificity*(sels: CompoundSelector): int =
  for sel in sels:
    result += getSpecificity(sel)

func getSpecificity*(cxsel: ComplexSelector): int =
  for sels in cxsel:
    result += getSpecificity(sels)

func pseudo*(cxsel: ComplexSelector): PseudoElem =
  if cxsel[^1][^1].t == PSELEM_SELECTOR:
    return cxsel[^1][^1].elem
  return PSEUDO_NONE

proc consume(state: var SelectorParser): CSSComponentValue =
  result = state.cvals[state.at]
  inc state.at

proc has(state: var SelectorParser, i = 0): bool =
  return not state.failed and state.at + i < state.cvals.len

proc peek(state: var SelectorParser, i = 0): CSSComponentValue =
  return state.cvals[state.at + i]

template fail() =
  state.failed = true
  return

template get_tok(cval: CSSComponentValue): CSSToken =
  let c = cval
  if not (c of CSSToken): fail
  CSSToken(c)

proc parseSelectorList(cvals: seq[CSSComponentValue]): SelectorList

# Functions that may contain other selectors, functions, etc.
proc parseRecursiveSelectorFunction(state: var SelectorParser, class: PseudoClass, body: seq[CSSComponentValue]): Selector =
  var fun = Selector(
    t: PSEUDO_SELECTOR,
    pseudo: PseudoData(t: class),
  )
  fun.pseudo.fsels = parseSelectorList(body)
  if fun.pseudo.fsels.len == 0: fail
  return fun

proc parseNthChild(state: var SelectorParser, cssfunction: CSSFunction, data: PseudoData): Selector =
  var data = data
  var (anb, i) = parseAnB(cssfunction.value)
  if anb.isNone: fail
  data.anb = anb.get
  var nthchild = Selector(t: PSEUDO_SELECTOR, pseudo: data)
  while i < cssfunction.value.len and cssfunction.value[i] == CSS_WHITESPACE_TOKEN:
    inc i
  if i >= cssfunction.value.len:
    return nthchild
  if (get_tok cssfunction.value[i]).value != "of": fail
  if i == cssfunction.value.len: fail
  nthchild.pseudo.ofsels = parseSelectorList(cssfunction.value[i..^1])
  if nthchild.pseudo.ofsels.len == 0: fail
  return nthchild

proc skipWhitespace(state: var SelectorParser) =
  while state.has() and state.peek() of CSSToken and
      CSSToken(state.peek()).tokenType == CSS_WHITESPACE_TOKEN:
    inc state.at

proc parseLang(cvals: seq[CSSComponentValue]): Selector =
  var state = SelectorParser(cvals: cvals)
  state.skipWhitespace()
  if not state.has(): fail
  let tok = get_tok state.consume()
  if tok.tokenType != CSS_IDENT_TOKEN: fail
  return Selector(t: PSEUDO_SELECTOR, pseudo: PseudoData(t: PSEUDO_LANG, s: tok.value))

proc parseSelectorFunction(state: var SelectorParser, cssfunction: CSSFunction): Selector =
  case cssfunction.name
  of "not": return state.parseRecursiveSelectorFunction(PSEUDO_NOT, cssfunction.value)
  of "is": return state.parseRecursiveSelectorFunction(PSEUDO_IS, cssfunction.value)
  of "where": return state.parseRecursiveSelectorFunction(PSEUDO_WHERE, cssfunction.value)
  of "nth-child": return state.parseNthChild(cssfunction, PseudoData(t: PSEUDO_NTH_CHILD))
  of "nth-last-child": return state.parseNthChild(cssfunction, PseudoData(t: PSEUDO_NTH_LAST_CHILD))
  of "lang": return parseLang(cssfunction.value)
  else: fail

proc parsePseudoSelector(state: var SelectorParser): Selector =
  if not state.has(): fail
  let cval = state.consume()
  if cval of CSSToken:
    template add_pseudo_element(element: PseudoElem) =
      return Selector(t: PSELEM_SELECTOR, elem: element)
    let tok = CSSToken(cval)
    case tok.tokenType
    of CSS_IDENT_TOKEN:
      template add_pseudo_class(class: PseudoClass) =
        return Selector(t: PSEUDO_SELECTOR, pseudo: PseudoData(t: class))
      case tok.value
      of "before": add_pseudo_element PSEUDO_BEFORE
      of "after": add_pseudo_element PSEUDO_AFTER
      of "first-child": add_pseudo_class PSEUDO_FIRST_CHILD
      of "last-child": add_pseudo_class PSEUDO_LAST_CHILD
      of "only-child": add_pseudo_class PSEUDO_ONLY_CHILD
      of "hover": add_pseudo_class PSEUDO_HOVER
      of "root": add_pseudo_class PSEUDO_ROOT
      of "checked": add_pseudo_class PSEUDO_CHECKED
      of "focus": add_pseudo_class PSEUDO_FOCUS
      of "link": add_pseudo_class PSEUDO_LINK
      of "visited": add_pseudo_class PSEUDO_VISITED
      else: fail
    of CSS_COLON_TOKEN:
      if not state.has(): fail
      let tok = get_tok state.consume()
      if tok.tokenType != CSS_IDENT_TOKEN: fail
      case tok.value
      of "before": add_pseudo_element PSEUDO_BEFORE
      of "after": add_pseudo_element PSEUDO_AFTER
      else: fail
    else: fail
  elif cval of CSSFunction:
    return state.parseSelectorFunction(CSSFunction(cval))
  else: fail

proc parseComplexSelector(state: var SelectorParser): ComplexSelector

proc parseAttributeSelector(state: var SelectorParser, cssblock: CSSSimpleBlock): Selector =
  if cssblock.token.tokenType != CSS_LBRACKET_TOKEN: fail
  var state2 = SelectorParser(cvals: cssblock.value)
  state2.skipWhitespace()
  if not state2.has(): fail
  let attr = get_tok state2.consume()
  if attr.tokenType != CSS_IDENT_TOKEN: fail
  state2.skipWhitespace()
  if not state2.has(): return Selector(t: ATTR_SELECTOR, attr: attr.value, rel: ' ')
  let delim0 = get_tok state2.consume()
  if delim0.tokenType != CSS_DELIM_TOKEN: fail
  case delim0.rvalue
  of Rune('~'), Rune('|'), Rune('^'), Rune('$'), Rune('*'):
    let delim1 = get_tok state2.consume()
    if delim1.tokenType != CSS_DELIM_TOKEN: fail
  of Rune('='):
    discard
  else: fail
  state2.skipWhitespace()
  if not state2.has(): fail
  let value = get_tok state2.consume()
  if value.tokenType notin {CSS_IDENT_TOKEN, CSS_STRING_TOKEN}: fail
  return Selector(t: ATTR_SELECTOR, attr: attr.value, value: value.value, rel: cast[char](delim0.rvalue))

proc parseClassSelector(state: var SelectorParser): Selector =
  if not state.has(): fail
  let tok = get_tok state.consume()
  if tok.tokenType != CSS_IDENT_TOKEN: fail
  return Selector(t: CLASS_SELECTOR, class: tok.value)

proc parseCompoundSelector(state: var SelectorParser): CompoundSelector =
  while state.has():
    let cval = state.peek()
    if cval of CSSToken:
      let tok = CSSToken(cval)
      case tok.tokenType
      of CSS_IDENT_TOKEN:
        inc state.at
        let s = tok.value.toLowerAscii()
        let tag = tagType(s)
        if tag == TAG_UNKNOWN:
          result.add(Selector(t: UNKNOWN_TYPE_SELECTOR, tagstr: s))
        else:
          result.add(Selector(t: TYPE_SELECTOR, tag: tag))
      of CSS_COLON_TOKEN:
        inc state.at
        result.add(state.parsePseudoSelector())
      of CSS_HASH_TOKEN:
        inc state.at
        result.add(Selector(t: ID_SELECTOR, id: tok.value))
      of CSS_COMMA_TOKEN: break
      of CSS_DELIM_TOKEN:
        case tok.rvalue
        of Rune('.'):
          inc state.at
          result.add(state.parseClassSelector())
        of Rune('*'):
          inc state.at
          result.add(Selector(t: UNIVERSAL_SELECTOR))
        of Rune('>'), Rune('+'), Rune('~'): break
        else: fail
      of CSS_WHITESPACE_TOKEN:
        # skip trailing whitespace
        if not state.has(1) or state.peek(1) == CSS_COMMA_TOKEN:
          inc state.at
        elif state.peek(1) == CSS_DELIM_TOKEN:
          let tok = CSSToken(state.peek(1))
          if tok.rvalue == Rune('>') or tok.rvalue == Rune('+') or tok.rvalue == Rune('~'):
            inc state.at
        break
      else: fail
    elif cval of CSSSimpleBlock:
      inc state.at
      result.add(state.parseAttributeSelector(CSSSimpleBlock(cval)))
    else:
      fail

proc parseComplexSelector(state: var SelectorParser): ComplexSelector =
  while true:
    state.skipWhitespace()
    let sels = state.parseCompoundSelector()
    result.add(sels)
    if sels.len == 0: fail
    if not state.has():
      break # finish
    let tok = get_tok state.consume()
    case tok.tokenType
    of CSS_DELIM_TOKEN:
      case tok.rvalue
      of Rune('>'): result[^1].ct = CHILD_COMBINATOR
      of Rune('+'): result[^1].ct = NEXT_SIBLING_COMBINATOR
      of Rune('~'): result[^1].ct = SUBSEQ_SIBLING_COMBINATOR
      else: fail
    of CSS_WHITESPACE_TOKEN:
      result[^1].ct = DESCENDANT_COMBINATOR
    of CSS_COMMA_TOKEN:
      break # finish
    else: fail
  if result.len == 0 or result[^1].ct != NO_COMBINATOR:
    fail

proc parseSelectorList(cvals: seq[CSSComponentValue]): SelectorList =
  var state = SelectorParser(cvals: cvals)
  var res: SelectorList
  while state.has():
    res.add(state.parseComplexSelector())
  if not state.failed:
    return res

func parseSelectors*(cvals: seq[CSSComponentValue]): seq[ComplexSelector] = {.cast(noSideEffect).}:
  return parseSelectorList(cvals)

proc parseSelectors*(stream: Stream): seq[ComplexSelector] =
  return parseSelectors(parseListOfComponentValues(stream))