summary refs log tree commit diff stats
diff options
context:
space:
mode:
-rwxr-xr-xconfig/nimrod.cfg2
-rwxr-xr-xdoc/lib.txt6
-rwxr-xr-xdoc/pegdocs.txt4
-rwxr-xr-xlib/pure/pegs.nim16
-rwxr-xr-xlib/pure/re.nim85
-rwxr-xr-xlib/pure/xmldom.nim16
-rwxr-xr-xlib/pure/xmldomparser.nim2
-rwxr-xr-xlib/pure/xmltree.nim5
-rwxr-xr-xtests/accept/run/spec.csv1
-rw-r--r--tests/accept/run/txmltree.nim7
-rwxr-xr-xweb/index.txt4
-rwxr-xr-xweb/news.txt3
-rwxr-xr-xweb/snippets/snippet1.nim2
13 files changed, 89 insertions, 64 deletions
diff --git a/config/nimrod.cfg b/config/nimrod.cfg
index 80d7a548d..7043f71fc 100755
--- a/config/nimrod.cfg
+++ b/config/nimrod.cfg
@@ -1,5 +1,5 @@
 # Configuration file for the Nimrod Compiler.
-# (c) 2009 Andreas Rumpf
+# (c) 2010 Andreas Rumpf
 
 # Feel free to edit the default values as you need.
 
diff --git a/doc/lib.txt b/doc/lib.txt
index b94ac987e..d2b327df7 100755
--- a/doc/lib.txt
+++ b/doc/lib.txt
@@ -55,7 +55,7 @@ String handling
 * `unicode <unicode.html>`_ 
   This module provides support to handle the Unicode UTF-8 encoding.
 
-* `regexprs <regexprs.html>`_
+* `re <re.html>`_
   This module contains procedures and operators for handling regular
   expressions. Consider using `pegs` instead.
 
@@ -229,6 +229,10 @@ Database support
 * `db_mysql <db_mysql.html>`_
   A higher level mySQL database wrapper. The same interface is implemented
   for other databases too.
+ 
+* `db_sqlite <db_sqlite.html>`_
+  A higher level mySQL database wrapper. The same interface is implemented
+  for other databases too.
 
 
 
diff --git a/doc/pegdocs.txt b/doc/pegdocs.txt
index 27eb19747..87b4e25bc 100755
--- a/doc/pegdocs.txt
+++ b/doc/pegdocs.txt
@@ -86,8 +86,8 @@ macro              meaning
 ``\s``             any whitespace character: ``[ \9-\13]``
 ``\S``             any character that is not a whitespace character:
                    ``[^ \9-\13]``
-``\w``             any "word" character: ``[a-zA-Z_]``
-``\W``             any "non-word" character: ``[^a-zA-Z_]``
+``\w``             any "word" character: ``[a-zA-Z0-9_]``
+``\W``             any "non-word" character: ``[^a-zA-Z0-9_]``
 ``\n``             any newline combination: ``\10 / \13\10 / \13``
 ``\i``             ignore case for matching; use this at the start of the PEG
 ``\y``             ignore style for matching; use this at the start of the PEG
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim
index 2cfae3201..201f13111 100755
--- a/lib/pure/pegs.nim
+++ b/lib/pure/pegs.nim
@@ -604,7 +604,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string],
   ## match, nothing is written into ``matches`` and ``false`` is
   ## returned.
   var c: TMatchClosure
-  result = m(s, pattern, start, c) == len(s)
+  result = m(s, pattern, start, c) == len(s) -start
   if result:
     for i in 0..c.ml-1:
       matches[i] = copy(s, c.matches[i][0], c.matches[i][1])
@@ -612,7 +612,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string],
 proc match*(s: string, pattern: TPeg, start = 0): bool =
   ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``.
   var c: TMatchClosure
-  result = m(s, pattern, start, c) == len(s)
+  result = m(s, pattern, start, c) == len(s)-start
 
 proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string],
                start = 0): int =
@@ -1263,8 +1263,8 @@ proc primary(p: var TPegParser): TPeg =
     of "D": result = charset({'\1'..'\xff'} - {'0'..'9'})
     of "s": result = charset({' ', '\9'..'\13'})
     of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'})
-    of "w": result = charset({'a'..'z', 'A'..'Z', '_'})
-    of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'})
+    of "w": result = charset({'a'..'z', 'A'..'Z', '_', '0'..'9'})
+    of "W": result = charset({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'})
     of "ident": result = pegs.ident
     else: pegError(p, "unknown built-in: " & p.tok.literal)
     getTok(p)
@@ -1435,3 +1435,11 @@ when isMainModule:
     assert matches[0] == "a"
   else:
     assert false
+    
+  var matches: array[0..5, string]
+  if match("abcdefg", peg"'c' {'d'} 'ef' {'g'}", matches, 2): 
+    assert matches[0] == "d"
+    assert matches[1] == "g"
+  else:
+    assert false
+
diff --git a/lib/pure/re.nim b/lib/pure/re.nim
index 1964bf200..b4aa75637 100755
--- a/lib/pure/re.nim
+++ b/lib/pure/re.nim
@@ -75,12 +75,14 @@ proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string],
     var b = rawMatches[i * 2 + 1]
     if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1)
     else: matches[i-1] = ""
-  return res
+  return rawMatches[1] - rawMatches[0]
 
 proc matchOrFind(s: string, pattern: TRegEx, start, flags: cint): cint =
   var rawMatches: array [0..maxSubpatterns * 3 - 1, cint]
-  return pcreExec(pattern.h, nil, s, len(s), start, flags,
-                  cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
+  result = pcreExec(pattern.h, nil, s, len(s), start, flags,
+                    cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
+  if result >= 0'i32:
+    result = rawMatches[1] - rawMatches[0]
 
 proc match*(s: string, pattern: TRegEx, matches: var openarray[string],
            start = 0): bool =
@@ -88,11 +90,12 @@ proc match*(s: string, pattern: TRegEx, matches: var openarray[string],
   ## the captured substrings in the array ``matches``. If it does not
   ## match, nothing is written into ``matches`` and ``false`` is
   ## returned.
-  return matchOrFind(s, pattern, matches, start, PCRE_ANCHORED) >= 0'i32
+  return matchOrFind(s, pattern, matches, start, 
+                     PCRE_ANCHORED) == cint(s.len - start)
 
 proc match*(s: string, pattern: TRegEx, start = 0): bool =
   ## returns ``true`` if ``s[start..]`` matches the ``pattern``.
-  return matchOrFind(s, pattern, start, PCRE_ANCHORED) >= 0'i32
+  return matchOrFind(s, pattern, start, PCRE_ANCHORED) == cint(s.len - start)
 
 proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string],
               start = 0): int =
@@ -112,12 +115,23 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string],
   ## returns the starting position of ``pattern`` in ``s`` and the captured
   ## substrings in the array ``matches``. If it does not match, nothing
   ## is written into ``matches`` and -1 is returned.
-  return matchOrFind(s, pattern, matches, start, 0'i32)
+  var
+    rawMatches: array[0..maxSubpatterns * 3 - 1, cint]
+    res = pcreExec(pattern.h, nil, s, len(s), start, 0'i32,
+      cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3)
+  if res < 0'i32: return res
+  for i in 1..int(res)-1:
+    var a = rawMatches[i * 2]
+    var b = rawMatches[i * 2 + 1]
+    if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1)
+    else: matches[i-1] = ""
+  return rawMatches[0]
 
 proc find*(s: string, pattern: TRegEx, start = 0): int =
   ## returns the starting position of ``pattern`` in ``s``. If it does not
   ## match, -1 is returned.
-  return matchOrFind(s, pattern, start, 0'i32)
+  var matches: array[0..maxSubpatterns-1, string]
+  result = find(s, pattern, matches, start)
 
 template `=~` *(s: string, pattern: TRegEx): expr = 
   ## This calls ``match`` with an implicit declared ``matches`` array that 
@@ -279,57 +293,36 @@ const ## common regular expressions
     ## describes an URL
 
 when isMainModule:
-  assert match("(a b c)", re"'(' @ ')'")
-  assert match("WHiLe", re(r"while", {reIgnoreCase}))
+  assert match("(a b c)", re"\( .* \)")
+  assert match("WHiLe", re("while", {reIgnoreCase}))
   
   assert "0158787".match(re"\d+")
   assert "ABC 0232".match(re"\w+\s+\d+")
-  assert "ABC".match(re"\d+ / \w+")
-
-  for word in split("00232this02939is39an22example111", re"\d+"):
-    writeln(stdout, word)
+  assert "ABC".match(re"\d+ | \w+")
 
   assert matchLen("key", re(reIdentifier)) == 3
 
-  var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+")
+  var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+"
   assert matchLen("key1=  cal9", pattern) == 11
   
-  var c: TMatchClosure
-  var s = "a+b +  c +d+e+f"
-  assert m(s, expr.rule, 0, c) == len(s)
-  var a = ""
-  for i in 0..c.ml-1:
-    a.add(copy(s, c.matches[i][0], c.matches[i][1]))
-  assert a == "abcdef"
-  #echo expr.rule
-
-  #const filename = "lib/devel/peg/grammar.txt"
-  #var grammar = parsePeg(newFileStream(filename, fmRead), filename)
-  #echo "a <- [abc]*?".match(grammar)
-  assert find("_____abc_______", term("abc")) == 5
-  assert match("_______ana", peg"A <- 'ana' / . A")
-  assert match("abcs%%%", peg"A <- ..A / .A / '%'")
-
-  if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}":
-    assert matches[0] == "abc"
+  assert find("_____abc_______", re"abc") == 5
+  
+  var matches: array[0..5, string]
+  if match("abcdefg", re"c(d)ef(g)", matches, 2): 
+    assert matches[0] == "d"
+    assert matches[1] == "g"
   else:
     assert false
   
-  var g2 = peg"""S <- A B / C D
-                 A <- 'a'+
-                 B <- 'b'+
-                 C <- 'c'+
-                 D <- 'd'+
-              """
-  assert($g2 == "((A B) / (C D))")
-  assert match("cccccdddddd", g2)
-  assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") ==
-         "var1<-keykey; var2<-key2key2")
-  assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}")
-
-  if "aaaaaa" =~ peg"'aa' !. / ({'a'})+":
-    assert matches[0] == "a"
+  if "abc" =~ re"(a)bcxyz|(\w+)":
+    assert matches[1] == "abc"
   else:
     assert false
+    
+  assert "var1=key; var2=key2".endsWith(re"\w+=\w+")
+  assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") ==
+         "var1<-keykey; var2<-key2key2")
 
+  for word in split("00232this02939is39an22example111", re"\d+"):
+    writeln(stdout, word)
 
diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim
index babf60108..b7ee165f5 100755
--- a/lib/pure/xmldom.nim
+++ b/lib/pure/xmldom.nim
@@ -1044,10 +1044,20 @@ proc target*(PI: PProcessingInstruction): string =
     
 # --Other stuff--
 # Writer
+proc addEscaped(s: string): string = 
+  result = ""
+  for c in items(s):
+    case c
+    of '<': result.add("&lt;")
+    of '>': result.add("&gt;")
+    of '&': result.add("&amp;")
+    of '"': result.add("&quot;")
+    else: result.add(c)
+
 proc nodeToXml(n: PNode, indent: int = 0): string =
   result = repeatChar(indent, ' ') & "<" & n.nodeName
   for i in items(n.Attributes):
-    result.add(" " & i.name & "=\"" & i.value & "\"")
+    result.add(" " & i.name & "=\"" & addEscaped(i.value) & "\"")
   
   if n.childNodes.len() == 0:
     result.add("/>") # No idea why this doesn't need a \n :O
@@ -1060,7 +1070,7 @@ proc nodeToXml(n: PNode, indent: int = 0): string =
         result.add(nodeToXml(i, indent + 2))
       of TextNode:
         result.add(repeatChar(indent * 2, ' '))
-        result.add(i.nodeValue)
+        result.add(addEscaped(i.nodeValue))
       of CDataSectionNode:
         result.add(repeatChar(indent * 2, ' '))
         result.add("<![CDATA[" & i.nodeValue & "]]>")
@@ -1080,4 +1090,4 @@ proc nodeToXml(n: PNode, indent: int = 0): string =
 proc `$`*(doc: PDocument): string =
   ## Converts a PDocument object into a string representation of it's XML
   result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n"
-  result.add(nodeToXml(doc.documentElement))
\ No newline at end of file
+  result.add(nodeToXml(doc.documentElement))
diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim
index f338ca2e5..d9eb210c3 100755
--- a/lib/pure/xmldomparser.nim
+++ b/lib/pure/xmldomparser.nim
@@ -165,4 +165,4 @@ when isMainModule:
       echo(i.nodeName, "=", i.namespaceURI)
 
     
-  echo($xml)
\ No newline at end of file
+  echo($xml)
diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim
index 7b77fe156..c79b9ad40 100755
--- a/lib/pure/xmltree.nim
+++ b/lib/pure/xmltree.nim
@@ -223,9 +223,8 @@ proc newXmlTree*(tag: string, children: openArray[PXmlNode],
   result.fAttr = attributes
   
 proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} =
-  ## use this procedure to define a new XML tag
-  expectLen(e, 1)
-  var a = e[0]
+  expectLen(e, 2)
+  var a = e[1]
   if a.kind == nnkCall:
     result = newCall("newXmlTree", toStrLit(a[0]))
     var attrs = newCall("newStringTable", [])
diff --git a/tests/accept/run/spec.csv b/tests/accept/run/spec.csv
index c5e16685a..a733326f3 100755
--- a/tests/accept/run/spec.csv
+++ b/tests/accept/run/spec.csv
@@ -59,3 +59,4 @@ tstrutil.nim;ha/home/a1xyz/usr/bin
 tvardecl.nim;44
 tvarnums.nim;Success!
 tvartup.nim;2 3
+txmltree.nim;true
diff --git a/tests/accept/run/txmltree.nim b/tests/accept/run/txmltree.nim
new file mode 100644
index 000000000..efd7b07af
--- /dev/null
+++ b/tests/accept/run/txmltree.nim
@@ -0,0 +1,7 @@
+
+import xmltree
+
+var x = <>a(href="nimrod.de", "www.nimrod-test.de")
+
+echo x == "<a href=\"nimrod.de\">www.nimrod-test.de"
+
diff --git a/web/index.txt b/web/index.txt
index fd6a04242..c227c268a 100755
--- a/web/index.txt
+++ b/web/index.txt
@@ -19,7 +19,7 @@ Welcome to Nimrod
   
   .. code-block:: nimrod
     # Filter key=value pairs
-    import regexprs
+    import re
 
     for x in lines("myfile.txt"):
       if x =~ r"(\w+)=(.*)":
@@ -70,7 +70,7 @@ Nimrod is expressive
   generics, etc.
 * User-defineable operators; code with new operators is often easier to read
   than code which overloads built-in operators. In the code snippet, the 
-  ``=~`` operator is defined in the ``regexprs`` module.
+  ``=~`` operator is defined in the ``re`` module.
 * Macros can modify the abstract syntax tree at compile time.
 
 
diff --git a/web/news.txt b/web/news.txt
index 6ce9f7fcd..abe1c5e4d 100755
--- a/web/news.txt
+++ b/web/news.txt
@@ -24,6 +24,7 @@ Bugfixes
 - Fixed ``unicode.toUTF8``. 
 - The compiler now rejects ``'\n'``. 
 - ``times.getStartMilsecs()`` now works on Mac OS X.
+- Fixed a bug in ``pegs.match`` concerning start offsets.
 
 
 Additions
@@ -45,6 +46,7 @@ Additions
 - Added ``xmltree`` module.
 - Added ``xmlparser`` module.
 - Added ``htmlparser`` module.
+- Added ``re`` module.
 - Many wrappers now do not contain redundant name prefixes (like ``GTK_``,
   ``lua``). The new wrappers are available in ``lib/newwrap``. Change
   your configuration file to use these.
@@ -72,6 +74,7 @@ Changes affecting backwards compatibility
   named arguments only, because the parameter order will change the next
   version!
 - ``atomic`` and ``let`` are now keywords.
+- The ``\w`` character class for pegs now includes the digits ``'0'..'9'``.
 
 
 2009-12-21 Version 0.8.6 released
diff --git a/web/snippets/snippet1.nim b/web/snippets/snippet1.nim
index 05359a0e0..85cb98142 100755
--- a/web/snippets/snippet1.nim
+++ b/web/snippets/snippet1.nim
@@ -1,4 +1,4 @@
 import strutils
 echo "Give a list of integers (separated by spaces): ", 
-     stdin.readLine.splitSeq.each(parseInt).max,
+     stdin.readLine.split.each(parseInt).max,
      " is the maximum!"