summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorrec <44084068+recloser@users.noreply.github.com>2018-10-30 06:58:39 +0100
committerAndreas Rumpf <rumpf_a@web.de>2018-10-30 06:58:39 +0100
commit9899c4525c9bedbdc3a8d7d4fbcbf8f85a4c474a (patch)
treea6d13c17fedcabb3e5b92158d3bdc73fb434a145
parent69c0a9c6fb688d382d83c165860006977dd6bf04 (diff)
downloadNim-9899c4525c9bedbdc3a8d7d4fbcbf8f85a4c474a.tar.gz
Add parsing empty attribs to htmlparser (#9559)
-rw-r--r--lib/pure/htmlparser.nim3
-rw-r--r--lib/pure/parsexml.nim8
-rw-r--r--tests/stdlib/thtmlparser.nim15
3 files changed, 23 insertions, 3 deletions
diff --git a/lib/pure/htmlparser.nim b/lib/pure/htmlparser.nim
index 9e1a5a101..2d24050f2 100644
--- a/lib/pure/htmlparser.nim
+++ b/lib/pure/htmlparser.nim
@@ -2014,7 +2014,8 @@ proc parseHtml*(s: Stream, filename: string,
   ## Parses the XML from stream `s` and returns a ``XmlNode``. Every
   ## occurred parsing error is added to the `errors` sequence.
   var x: XmlParser
-  open(x, s, filename, {reportComments, reportWhitespace, allowUnquotedAttribs})
+  open(x, s, filename, {reportComments, reportWhitespace, allowUnquotedAttribs,
+    allowEmptyAttribs})
   next(x)
   # skip the DOCTYPE:
   if x.kind == xmlSpecial: next(x)
diff --git a/lib/pure/parsexml.nim b/lib/pure/parsexml.nim
index 39b117d40..0967f7983 100644
--- a/lib/pure/parsexml.nim
+++ b/lib/pure/parsexml.nim
@@ -189,6 +189,7 @@ type
     reportWhitespace,      ## report whitespace
     reportComments         ## report comments
     allowUnquotedAttribs   ## allow unquoted attribute values (for HTML)
+    allowEmptyAttribs      ## allow empty attributes (without explicit value)
 
   XmlParser* = object of BaseLexer ## the parser object.
     a, b, c: string
@@ -621,10 +622,15 @@ proc parseAttribute(my: var XmlParser) =
   if my.a.len == 0:
     markError(my, errGtExpected)
     return
+
+  let startPos = my.bufpos
   parseWhitespace(my, skip=true)
   if my.buf[my.bufpos] != '=':
-    markError(my, errEqExpected)
+    if allowEmptyAttribs notin my.options or
+        (my.buf[my.bufpos] != '>' and my.bufpos == startPos):
+      markError(my, errEqExpected)
     return
+
   inc(my.bufpos)
   parseWhitespace(my, skip=true)
 
diff --git a/tests/stdlib/thtmlparser.nim b/tests/stdlib/thtmlparser.nim
index 58b2d0377..0457585d0 100644
--- a/tests/stdlib/thtmlparser.nim
+++ b/tests/stdlib/thtmlparser.nim
@@ -92,6 +92,8 @@ block t6154:
         <p something=  &#9;foo&#9;bar&#178; foo  =bloo></p>
         <p class="foo2" id="bar2"></p>
         <p wrong= ></p>
+        <p data-foo data-bar="correct!" enabled  ></p>
+        <p quux whatever></p>
       </body>
   </html>
   """
@@ -100,7 +102,7 @@ block t6154:
   let html = parseHtml(newStringStream(foo), "statichtml", errors=errors)
   doAssert "statichtml(11, 18) Error: attribute value expected" in errors
   let ps = html.findAll("p")
-  doAssert ps.len == 5
+  doAssert ps.len == 7
 
   doAssert ps[0].attrsLen == 2
   doAssert ps[0].attr("class") == "foo"
@@ -123,3 +125,14 @@ block t6154:
 
   doAssert ps[4].attrsLen == 1
   doAssert ps[4].attr("wrong") == ""
+
+  doAssert ps[5].attrsLen == 3
+  doAssert ps[5].attr("data-foo") == ""
+  doAssert ps[5].attr("data-bar") == "correct!"
+  doAssert ps[5].attr("enabled") == ""
+  doassert ps[5].len == 0
+
+  doAssert ps[6].attrsLen == 2
+  doAssert ps[6].attr("quux") == ""
+  doAssert ps[6].attr("whatever") == ""
+  doassert ps[6].len == 0