about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-09-02 19:07:53 +0200
committerbptato <nincsnevem662@gmail.com>2023-09-02 19:10:37 +0200
commit6250d670a54b4018ead2d3a51a77ac714ce983ce (patch)
tree9c3eda6f4ffca7bd779b39d199ea92029271b8da
parent54c46d249156bd29c591f23ff63d74e0c4159039 (diff)
downloadchawan-6250d670a54b4018ead2d3a51a77ac714ce983ce.tar.gz
tokenizer: expose laststart
-rw-r--r--chame/htmltokenizer.nim6
-rw-r--r--tests/tokenizer.nim14
2 files changed, 13 insertions, 7 deletions
diff --git a/chame/htmltokenizer.nim b/chame/htmltokenizer.nim
index 0e0718ea..9dd05704 100644
--- a/chame/htmltokenizer.nim
+++ b/chame/htmltokenizer.nim
@@ -23,7 +23,7 @@ type
     tmp: string
     code: int
     tok: Token
-    laststart: Token
+    laststart*: Token
     attrn: string
     attrv: string
     attr: bool
@@ -226,7 +226,8 @@ iterator tokenize*(tokenizer: var Tokenizer): Token =
     if tokenizer.onParseError != nil:
       tokenizer.onParseError(error)
   template is_appropriate_end_tag_token(): bool =
-    tokenizer.laststart != nil and tokenizer.laststart.tagname == tokenizer.tok.tagname
+    tokenizer.laststart != nil and
+      tokenizer.laststart.tagname == tokenizer.tok.tagname
   template start_new_attribute =
     if tokenizer.attr:
       tokenizer.tok.attrs[tokenizer.attrn] = tokenizer.attrv
@@ -357,7 +358,6 @@ iterator tokenize*(tokenizer: var Tokenizer): Token =
   const null = char(0)
 
   while running:
-    #eprint tokenizer.state #debug
     let is_eof = tokenizer.atEof # set eof here, otherwise we would exit at the last character
     let r = if not is_eof:
       tokenizer.consume()
diff --git a/tests/tokenizer.nim b/tests/tokenizer.nim
index da5bbb5f..d8280572 100644
--- a/tests/tokenizer.nim
+++ b/tests/tokenizer.nim
@@ -65,12 +65,13 @@ proc checkEquals(tok, otok: Token, desc: string) =
     doAssert tok.selfclosing == otok.selfclosing, desc
     doAssert tok.attrs == otok.attrs, desc
   of TokenType.CHARACTER, TokenType.CHARACTER_WHITESPACE:
-    doAssert tok.s == otok.s, desc
+    doAssert tok.s == otok.s, desc & " (tok s: " & tok.s & " otok s: " &
+      otok.s & ")"
   of TokenType.COMMENT:
     doAssert tok.data == otok.data, desc
   of EOF: discard
 
-proc runTest(desc, input: string, output: seq[JsonNode],
+proc runTest(desc, input: string, output: seq[JsonNode], laststart: string,
     state: TokenizerState = DATA) =
   echo desc
   let ss = newStringStream(input)
@@ -78,6 +79,7 @@ proc runTest(desc, input: string, output: seq[JsonNode],
   proc onParseError(e: ParseError) =
     discard
   var tokenizer = newTokenizer(ds, onParseError)
+  tokenizer.laststart = Token(t: START_TAG, tagname: laststart)
   tokenizer.state = state
   var i = 0
   var chartok: Token = nil
@@ -124,12 +126,16 @@ proc runTests(filename: string) =
     let desc = t{"description"}.getStr()
     let input = t{"input"}.getStr()
     let output = t{"output"}.getElems()
+    let laststart = if "lastStartTag" in t:
+      t{"lastStartTag"}.getStr()
+    else:
+      ""
     if "initialStates" notin t:
-      runTest(desc, input, output)
+      runTest(desc, input, output, laststart)
     else:
       for state in t{"initialStates"}:
         let state = getState(state.getStr())
-        runTest(desc, input, output)
+        runTest(desc, input, output, laststart, state)
 
 test "contentModelFlags":
   runTests("contentModelFlags.test")