summary refs log tree commit diff stats
path: root/lib
diff options
context:
space:
mode:
Diffstat (limited to 'lib')
-rwxr-xr-xlib/pure/pegs.nim16
-rwxr-xr-xlib/pure/re.nim85
-rwxr-xr-xlib/pure/xmldom.nim16
-rwxr-xr-xlib/pure/xmldomparser.nim2
-rwxr-xr-xlib/pure/xmltree.nim5
5 files changed, 67 insertions, 57 deletions
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim
index 2cfae3201..201f13111 100755
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -604,7 +604,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string],
   ## match, nothing is written into ``matches`` and ``false`` is
   ## returned.
   var c: TMatchClosure
-  result = m(s, pattern, start, c) == len(s)
+  result = m(s, pattern, start, c) == len(s) -start
   if result:
     for i in 0..c.ml-1:
       matches[i] = copy(s, c.matches[i][0], c.matches[i][1])
@@ -612,7 +612,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string],
 proc match*(s: string, pattern: TPeg, start = 0): bool =
   ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``.
   var c: TMatchClosure
-  result = m(s, pattern, start, c) == len(s)
+  result = m(s, pattern, start, c) == len(s)-start
 
 proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string],
                start = 0): int =
@@ -1263,8 +1263,8 @@ proc primary(p: var TPegParser): TPeg =
     of "D": result = charset({'\1'..'\xff'} - {'0'..'9'})
     of "s": result = charset({' ', '\9'..'\13'})
     of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'})
-    of "w": result = charset({'a'..'z', 'A'..'Z', '_'})
-    of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'})
+    of "w": result = charset({'a'..'z', 'A'..'Z', '_', '0'..'9'})
+    of "W": result = charset({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'})
     of "ident": result = pegs.ident
     else: pegError(p, "unknown built-in: " & p.tok.literal)
     getTok(p)
@@ -1435,3 +1435,11 @@ when isMainModule:
     assert matches[0] == "a"
   else:
     assert false
+    
+  var matches: array[0..5, string]
+  if match("abcdefg", peg"'c' {'d'} 'ef' {'g'}", matches, 2): 
+    assert matches[0] == "d"
+    assert matches[1] == "g"
+  else:
+    assert false
+
diff --git a/lib/pure/re.nim b/lib/pure/re.nim
index 1964bf200..b4aa75637 100755
--- a/lib/pure/re.nim
+++ b/lib/pure/re.nim
@@ -75,12 +75,14 @@ proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string],
     var b = rawMatches[i * 2 + 1]
     if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1)
     else: matches[i-1] = ""
-  return res
+  return rawMatches[1] - rawMatches[0]
 
 proc matchOrFind(s: string, pattern: TRegEx, start, flags: cint): cint =
   var rawMatches: array [0..maxSubpatterns * 3 - 1, cint]
-  return pcreExec(pattern.h, nil, s, len(s), start, flags,
-                  cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
+  result = pcreExec(pattern.h, nil, s, len(s), start, flags,
+                    cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
+  if result >= 0'i32:
+    result = rawMatches[1] - rawMatches[0]
 
 proc match*(s: string, pattern: TRegEx, matches: var openarray[string],
            start = 0): bool =
@@ -88,11 +90,12 @@ proc match*(s: string, pattern: TRegEx, matches: var openarray[string],
   ## the captured substrings in the array ``matches``. If it does not
   ## match, nothing is written into ``matches`` and ``false`` is
   ## returned.
-  return matchOrFind(s, pattern, matches, start, PCRE_ANCHORED) >= 0'i32
+  return matchOrFind(s, pattern, matches, start, 
+                     PCRE_ANCHORED) == cint(s.len - start)
 
 proc match*(s: string, pattern: TRegEx, start = 0): bool =
   ## returns ``true`` if ``s[start..]`` matches the ``pattern``.
-  return matchOrFind(s, pattern, start, PCRE_ANCHORED) >= 0'i32
+  return matchOrFind(s, pattern, start, PCRE_ANCHORED) == cint(s.len - start)
 
 proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string],
               start = 0): int =
@@ -112,12 +115,23 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string],
   ## returns the starting position of ``pattern`` in ``s`` and the captured
   ## substrings in the array ``matches``. If it does not match, nothing
   ## is written into ``matches`` and -1 is returned.
-  return matchOrFind(s, pattern, matches, start, 0'i32)
+  var
+    rawMatches: array[0..maxSubpatterns * 3 - 1, cint]
+    res = pcreExec(pattern.h, nil, s, len(s), start, 0'i32,
+      cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
+  if res < 0'i32: return res
+  for i in 1..int(res)-1:
+    var a = rawMatches[i * 2]
+    var b = rawMatches[i * 2 + 1]
+    if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1)
+    else: matches[i-1] = ""
+  return rawMatches[0]
 
 proc find*(s: string, pattern: TRegEx, start = 0): int =
   ## returns the starting position of ``pattern`` in ``s``. If it does not
   ## match, -1 is returned.
-  return matchOrFind(s, pattern, start, 0'i32)
+  var matches: array[0..maxSubpatterns-1, string]
+  result = find(s, pattern, matches, start)
 
 template `=~` *(s: string, pattern: TRegEx): expr = 
   ## This calls ``match`` with an implicit declared ``matches`` array that 
@@ -279,57 +293,36 @@ const ## common regular expressions
     ## describes an URL
 
 when isMainModule:
-  assert match("(a b c)", re"'(' @ ')'")
-  assert match("WHiLe", re(r"while", {reIgnoreCase}))
+  assert match("(a b c)", re"\( .* \)")
+  assert match("WHiLe", re("while", {reIgnoreCase}))
   
   assert "0158787".match(re"\d+")
   assert "ABC 0232".match(re"\w+\s+\d+")
-  assert "ABC".match(re"\d+ / \w+")
-
-  for word in split("00232this02939is39an22example111", re"\d+"):
-    writeln(stdout, word)
+  assert "ABC".match(re"\d+ | \w+")
 
   assert matchLen("key", re(reIdentifier)) == 3
 
-  var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+")
+  var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+"
   assert matchLen("key1=  cal9", pattern) == 11
   
-  var c: TMatchClosure
-  var s = "a+b +  c +d+e+f"
-  assert m(s, expr.rule, 0, c) == len(s)
-  var a = ""
-  for i in 0..c.ml-1:
-    a.add(copy(s, c.matches[i][0], c.matches[i][1]))
-  assert a == "abcdef"
-  #echo expr.rule
-
-  #const filename = "lib/devel/peg/grammar.txt"
-  #var grammar = parsePeg(newFileStream(filename, fmRead), filename)
-  #echo "a <- [abc]*?".match(grammar)
-  assert find("_____abc_______", term("abc")) == 5
-  assert match("_______ana", peg"A <- 'ana' / . A")
-  assert match("abcs%%%", peg"A <- ..A / .A / '%'")
-
-  if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
-    assert matches[0] == "abc"
+  assert find("_____abc_______", re"abc") == 5
+  
+  var matches: array[0..5, string]
+  if match("abcdefg", re"c(d)ef(g)", matches, 2): 
+    assert matches[0] == "d"
+    assert matches[1] == "g"
   else:
     assert false
   
-  var g2 = peg"""S <- A B / C D
-                 A <- 'a'+
-                 B <- 'b'+
-                 C <- 'c'+
-                 D <- 'd'+
-              """
-  assert($g2 == "((A B) / (C D))")
-  assert match("cccccdddddd", g2)
-  assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
-         "var1<-keykey; var2<-key2key2")
-  assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
-
-  if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
-    assert matches[0] == "a"
+  if "abc" =~ re"(a)bcxyz|(\w+)":
+    assert matches[1] == "abc"
   else:
     assert false
+    
+  assert "var1=key; var2=key2".endsWith(re"\w+=\w+")
+  assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") ==
+         "var1<-keykey; var2<-key2key2")
 
+  for word in split("00232this02939is39an22example111", re"\d+"):
+    writeln(stdout, word)
 
diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim
index babf60108..b7ee165f5 100755
--- a/lib/pure/xmldom.nim
+++ b/lib/pure/xmldom.nim
@@ -1044,10 +1044,20 @@ proc target*(PI: PProcessingInstruction): string =
     
 # --Other stuff--
 # Writer
+proc addEscaped(s: string): string = 
+  result = ""
+  for c in items(s):
+    case c
+    of '<': result.add("&lt;")
+    of '>': result.add("&gt;")
+    of '&': result.add("&amp;")
+    of '"': result.add("&quot;")
+    else: result.add(c)
+
 proc nodeToXml(n: PNode, indent: int = 0): string =
   result = repeatChar(indent, ' ') & "<" & n.nodeName
   for i in items(n.Attributes):
-    result.add(" " & i.name & "=\"" & i.value & "\"")
+    result.add(" " & i.name & "=\"" & addEscaped(i.value) & "\"")
   
   if n.childNodes.len() == 0:
     result.add("/>") # No idea why this doesn't need a \n :O
@@ -1060,7 +1070,7 @@ proc nodeToXml(n: PNode, indent: int = 0): string =
         result.add(nodeToXml(i, indent + 2))
       of TextNode:
         result.add(repeatChar(indent * 2, ' '))
-        result.add(i.nodeValue)
+        result.add(addEscaped(i.nodeValue))
       of CDataSectionNode:
         result.add(repeatChar(indent * 2, ' '))
         result.add("<![CDATA[" & i.nodeValue & "]]>")
@@ -1080,4 +1090,4 @@ proc nodeToXml(n: PNode, indent: int = 0): string =
 proc `$`*(doc: PDocument): string =
   ## Converts a PDocument object into a string representation of it's XML
   result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
-  result.add(nodeToXml(doc.documentElement))
\ No newline at end of file
+  result.add(nodeToXml(doc.documentElement))
diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim
index f338ca2e5..d9eb210c3 100755
--- a/lib/pure/xmldomparser.nim
+++ b/lib/pure/xmldomparser.nim
@@ -165,4 +165,4 @@ when isMainModule:
       echo(i.nodeName, "=", i.namespaceURI)
 
     
-  echo($xml)
\ No newline at end of file
+  echo($xml)
diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim
index 7b77fe156..c79b9ad40 100755
--- a/lib/pure/xmltree.nim
+++ b/lib/pure/xmltree.nim
@@ -223,9 +223,8 @@ proc newXmlTree*(tag: string, children: openArray[PXmlNode],
   result.fAttr = attributes
   
 proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} =
-  ## use this procedure to define a new XML tag
-  expectLen(e, 1)
-  var a = e[0]
+  expectLen(e, 2)
+  var a = e[1]
   if a.kind == nnkCall:
     result = newCall("newXmlTree", toStrLit(a[0]))
     var attrs = newCall("newStringTable", [])