From 61f31f07c04081943382d94554b9eef5aaee1712 Mon Sep 17 00:00:00 2001 From: bptato Date: Sun, 19 Dec 2021 14:13:40 +0100 Subject: Fix html parser bugs --- src/html/parser.nim | 13 +++++++------ src/utils/twtstr.nim | 3 +++ 2 files changed, 10 insertions(+), 6 deletions(-) diff --git a/src/html/parser.nim b/src/html/parser.nim index 8809e392..8c92b5d6 100644 --- a/src/html/parser.nim +++ b/src/html/parser.nim @@ -117,9 +117,9 @@ proc parse_tag(buf: string, at: var int): DOMParsedTag = tag.open = false at = skipBlanks(buf, at) - while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>': + while at < buf.len and not buf[at].isWhitespace() and not (tag.open and buf[at] == '/') and buf[at] != '>' and buf[at].isAscii(): tagname &= buf[at].tolower() - at += buf.runeLenAt(at) + inc at tag.tagid = tagType(tagname) at = skipBlanks(buf, at) @@ -153,14 +153,15 @@ proc parse_tag(buf: string, at: var int): DOMParsedTag = inc at elif at < buf.len: while at < buf.len and not buf[at].isWhitespace() and buf[at] != '>': - value &= buf[at] - at += buf.runeLenAt(at) + var r: Rune + fastRuneAt(buf, at, r) + value &= $r if attrname.len > 0: tag.attrs[attrname] = value while at < buf.len and buf[at] != '>': - at += buf.runeLenAt(at) + inc at if at < buf.len and buf[at] == '>': inc at @@ -463,7 +464,7 @@ proc parseHtml*(inputStream: Stream): Document = of '>': till_when = false else: discard - at += lineBuf.runeLenAt(at) + inc at if till_when: continue diff --git a/src/utils/twtstr.nim b/src/utils/twtstr.nim index 52abade7..47d2c16f 100644 --- a/src/utils/twtstr.nim +++ b/src/utils/twtstr.nim @@ -138,6 +138,9 @@ func hexValue*(c: char): int = func decValue*(c: char): int = return decCharMap[int(c)] +func isAscii*(c: char): bool = + return int(c) < 128 + func isAscii*(r: Rune): bool = return int(r) < 128 -- cgit 1.4.1-2-gfad0