diff options
author | bptato <nincsnevem662@gmail.com> | 2023-09-02 19:07:53 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2023-09-02 19:10:37 +0200 |
commit | 6250d670a54b4018ead2d3a51a77ac714ce983ce (patch) | |
tree | 9c3eda6f4ffca7bd779b39d199ea92029271b8da | |
parent | 54c46d249156bd29c591f23ff63d74e0c4159039 (diff) | |
download | chawan-6250d670a54b4018ead2d3a51a77ac714ce983ce.tar.gz |
tokenizer: expose laststart
-rw-r--r-- | chame/htmltokenizer.nim | 6 | ||||
-rw-r--r-- | tests/tokenizer.nim | 14 |
2 files changed, 13 insertions, 7 deletions
diff --git a/chame/htmltokenizer.nim b/chame/htmltokenizer.nim index 0e0718ea..9dd05704 100644 --- a/chame/htmltokenizer.nim +++ b/chame/htmltokenizer.nim @@ -23,7 +23,7 @@ type tmp: string code: int tok: Token - laststart: Token + laststart*: Token attrn: string attrv: string attr: bool @@ -226,7 +226,8 @@ iterator tokenize*(tokenizer: var Tokenizer): Token = if tokenizer.onParseError != nil: tokenizer.onParseError(error) template is_appropriate_end_tag_token(): bool = - tokenizer.laststart != nil and tokenizer.laststart.tagname == tokenizer.tok.tagname + tokenizer.laststart != nil and + tokenizer.laststart.tagname == tokenizer.tok.tagname template start_new_attribute = if tokenizer.attr: tokenizer.tok.attrs[tokenizer.attrn] = tokenizer.attrv @@ -357,7 +358,6 @@ iterator tokenize*(tokenizer: var Tokenizer): Token = const null = char(0) while running: - #eprint tokenizer.state #debug let is_eof = tokenizer.atEof # set eof here, otherwise we would exit at the last character let r = if not is_eof: tokenizer.consume() diff --git a/tests/tokenizer.nim b/tests/tokenizer.nim index da5bbb5f..d8280572 100644 --- a/tests/tokenizer.nim +++ b/tests/tokenizer.nim @@ -65,12 +65,13 @@ proc checkEquals(tok, otok: Token, desc: string) = doAssert tok.selfclosing == otok.selfclosing, desc doAssert tok.attrs == otok.attrs, desc of TokenType.CHARACTER, TokenType.CHARACTER_WHITESPACE: - doAssert tok.s == otok.s, desc + doAssert tok.s == otok.s, desc & " (tok s: " & tok.s & " otok s: " & + otok.s & ")" of TokenType.COMMENT: doAssert tok.data == otok.data, desc of EOF: discard -proc runTest(desc, input: string, output: seq[JsonNode], +proc runTest(desc, input: string, output: seq[JsonNode], laststart: string, state: TokenizerState = DATA) = echo desc let ss = newStringStream(input) @@ -78,6 +79,7 @@ proc runTest(desc, input: string, output: seq[JsonNode], proc onParseError(e: ParseError) = discard var tokenizer = newTokenizer(ds, onParseError) + tokenizer.laststart = Token(t: START_TAG, tagname: laststart) tokenizer.state = state var i = 0 var chartok: Token = nil @@ -124,12 +126,16 @@ proc runTests(filename: string) = let desc = t{"description"}.getStr() let input = t{"input"}.getStr() let output = t{"output"}.getElems() + let laststart = if "lastStartTag" in t: + t{"lastStartTag"}.getStr() + else: + "" if "initialStates" notin t: - runTest(desc, input, output) + runTest(desc, input, output, laststart) else: for state in t{"initialStates"}: let state = getState(state.getStr()) - runTest(desc, input, output) + runTest(desc, input, output, laststart, state) test "contentModelFlags": runTests("contentModelFlags.test") |