summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorAraq <rumpf_a@web.de>2019-09-04 11:20:30 +0200
committerAraq <rumpf_a@web.de>2019-09-04 11:20:30 +0200
commitfa5707e7e2494065be09f4e4f0590140399cd215 (patch)
tree2f257ca9bbf61eb51b8851b5c64a7b5162f80273
parentcb0450398a5695e1e1193e6c49ff9c7548980438 (diff)
downloadNim-fa5707e7e2494065be09f4e4f0590140399cd215.tar.gz
fixes #11713, fixes #1034
-rw-r--r--lib/pure/parsexml.nim9
-rw-r--r--tests/stdlib/thtmlparser.nim20
2 files changed, 26 insertions, 3 deletions
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index 3b77f9c62..686aad110 100644
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -472,16 +472,19 @@ proc parseEntity(my: var XmlParser, dest: var string) =
     inc(pos, 4)
   else:
     my.bufpos = pos
-    parseName(my, dest)
+    var name = ""
+    parseName(my, name)
     pos = my.bufpos
-    if my.err != errNameExpected:
+    if my.err != errNameExpected and my.buf[pos] == ';':
       my.kind = xmlEntity
     else:
       add(dest, '&')
+    add(dest, name)
   if my.buf[pos] == ';':
     inc(pos)
   else:
-    markError(my, errSemicolonExpected)
+    my.err = errSemicolonExpected
+    # do not overwrite 'my.state' here, it's a benign error
   my.bufpos = pos
 
 proc parsePI(my: var XmlParser) =
diff --git a/tests/stdlib/thtmlparser.nim b/tests/stdlib/thtmlparser.nim
index 0457585d0..ccf2f6202 100644
--- a/tests/stdlib/thtmlparser.nim
+++ b/tests/stdlib/thtmlparser.nim
@@ -2,6 +2,9 @@ discard """
   output: '''
 @[]
 true
+https://example.com/test?format=jpg&name=orig##
+https://example.com/test?format=jpg&name=orig##text
+https://example.com/test?format=jpg##text
 '''
 """
 import htmlparser
@@ -136,3 +139,20 @@ block t6154:
   doAssert ps[6].attr("quux") == ""
   doAssert ps[6].attr("whatever") == ""
   doassert ps[6].len == 0
+
+# bug #11713, #1034
+var content = """
+# with &
+<img src="https://example.com/test?format=jpg&name=orig" alt="">
+<img src="https://example.com/test?format=jpg&name=orig" alt="text">
+
+# without &
+<img src="https://example.com/test?format=jpg" alt="text">
+"""
+
+var
+  stream = newStringStream(content)
+  body = parseHtml(stream)
+
+for y in body.findAll("img"):
+  echo y.attr("src"), "##", y.attr("alt")