diff options
-rwxr-xr-x | config/nimrod.cfg | 2 | ||||
-rwxr-xr-x | doc/lib.txt | 6 | ||||
-rwxr-xr-x | doc/pegdocs.txt | 4 | ||||
-rwxr-xr-x | lib/pure/pegs.nim | 16 | ||||
-rwxr-xr-x | lib/pure/re.nim | 85 | ||||
-rwxr-xr-x | lib/pure/xmldom.nim | 16 | ||||
-rwxr-xr-x | lib/pure/xmldomparser.nim | 2 | ||||
-rwxr-xr-x | lib/pure/xmltree.nim | 5 | ||||
-rwxr-xr-x | tests/accept/run/spec.csv | 1 | ||||
-rw-r--r-- | tests/accept/run/txmltree.nim | 7 | ||||
-rwxr-xr-x | web/index.txt | 4 | ||||
-rwxr-xr-x | web/news.txt | 3 | ||||
-rwxr-xr-x | web/snippets/snippet1.nim | 2 |
13 files changed, 89 insertions, 64 deletions
diff --git a/config/nimrod.cfg b/config/nimrod.cfg index 80d7a548d..7043f71fc 100755 --- a/config/nimrod.cfg +++ b/config/nimrod.cfg @@ -1,5 +1,5 @@ # Configuration file for the Nimrod Compiler. -# (c) 2009 Andreas Rumpf +# (c) 2010 Andreas Rumpf # Feel free to edit the default values as you need. diff --git a/doc/lib.txt b/doc/lib.txt index b94ac987e..d2b327df7 100755 --- a/doc/lib.txt +++ b/doc/lib.txt @@ -55,7 +55,7 @@ String handling * `unicode <unicode.html>`_ This module provides support to handle the Unicode UTF-8 encoding. -* `regexprs <regexprs.html>`_ +* `re <re.html>`_ This module contains procedures and operators for handling regular expressions. Consider using `pegs` instead. @@ -229,6 +229,10 @@ Database support * `db_mysql <db_mysql.html>`_ A higher level mySQL database wrapper. The same interface is implemented for other databases too. + +* `db_sqlite <db_sqlite.html>`_ + A higher level mySQL database wrapper. The same interface is implemented + for other databases too. diff --git a/doc/pegdocs.txt b/doc/pegdocs.txt index 27eb19747..87b4e25bc 100755 --- a/doc/pegdocs.txt +++ b/doc/pegdocs.txt @@ -86,8 +86,8 @@ macro meaning ``\s`` any whitespace character: ``[ \9-\13]`` ``\S`` any character that is not a whitespace character: ``[^ \9-\13]`` -``\w`` any "word" character: ``[a-zA-Z_]`` -``\W`` any "non-word" character: ``[^a-zA-Z_]`` +``\w`` any "word" character: ``[a-zA-Z0-9_]`` +``\W`` any "non-word" character: ``[^a-zA-Z0-9_]`` ``\n`` any newline combination: ``\10 / \13\10 / \13`` ``\i`` ignore case for matching; use this at the start of the PEG ``\y`` ignore style for matching; use this at the start of the PEG diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 2cfae3201..201f13111 100755 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -604,7 +604,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string], ## match, nothing is written into ``matches`` and ``false`` is ## returned. var c: TMatchClosure - result = m(s, pattern, start, c) == len(s) + result = m(s, pattern, start, c) == len(s) -start if result: for i in 0..c.ml-1: matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) @@ -612,7 +612,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string], proc match*(s: string, pattern: TPeg, start = 0): bool = ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. var c: TMatchClosure - result = m(s, pattern, start, c) == len(s) + result = m(s, pattern, start, c) == len(s)-start proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], start = 0): int = @@ -1263,8 +1263,8 @@ proc primary(p: var TPegParser): TPeg = of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) of "s": result = charset({' ', '\9'..'\13'}) of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) - of "w": result = charset({'a'..'z', 'A'..'Z', '_'}) - of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'}) + of "w": result = charset({'a'..'z', 'A'..'Z', '_', '0'..'9'}) + of "W": result = charset({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'}) of "ident": result = pegs.ident else: pegError(p, "unknown built-in: " & p.tok.literal) getTok(p) @@ -1435,3 +1435,11 @@ when isMainModule: assert matches[0] == "a" else: assert false + + var matches: array[0..5, string] + if match("abcdefg", peg"'c' {'d'} 'ef' {'g'}", matches, 2): + assert matches[0] == "d" + assert matches[1] == "g" + else: + assert false + diff --git a/lib/pure/re.nim b/lib/pure/re.nim index 1964bf200..b4aa75637 100755 --- a/lib/pure/re.nim +++ b/lib/pure/re.nim @@ -75,12 +75,14 @@ proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string], var b = rawMatches[i * 2 + 1] if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1) else: matches[i-1] = "" - return res + return rawMatches[1] - rawMatches[0] proc matchOrFind(s: string, pattern: TRegEx, start, flags: cint): cint = var rawMatches: array [0..maxSubpatterns * 3 - 1, cint] - return pcreExec(pattern.h, nil, s, len(s), start, flags, - cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + result = pcreExec(pattern.h, nil, s, len(s), start, flags, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + if result >= 0'i32: + result = rawMatches[1] - rawMatches[0] proc match*(s: string, pattern: TRegEx, matches: var openarray[string], start = 0): bool = @@ -88,11 +90,12 @@ proc match*(s: string, pattern: TRegEx, matches: var openarray[string], ## the captured substrings in the array ``matches``. If it does not ## match, nothing is written into ``matches`` and ``false`` is ## returned. - return matchOrFind(s, pattern, matches, start, PCRE_ANCHORED) >= 0'i32 + return matchOrFind(s, pattern, matches, start, + PCRE_ANCHORED) == cint(s.len - start) proc match*(s: string, pattern: TRegEx, start = 0): bool = ## returns ``true`` if ``s[start..]`` matches the ``pattern``. - return matchOrFind(s, pattern, start, PCRE_ANCHORED) >= 0'i32 + return matchOrFind(s, pattern, start, PCRE_ANCHORED) == cint(s.len - start) proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string], start = 0): int = @@ -112,12 +115,23 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string], ## returns the starting position of ``pattern`` in ``s`` and the captured ## substrings in the array ``matches``. If it does not match, nothing ## is written into ``matches`` and -1 is returned. - return matchOrFind(s, pattern, matches, start, 0'i32) + var + rawMatches: array[0..maxSubpatterns * 3 - 1, cint] + res = pcreExec(pattern.h, nil, s, len(s), start, 0'i32, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + if res < 0'i32: return res + for i in 1..int(res)-1: + var a = rawMatches[i * 2] + var b = rawMatches[i * 2 + 1] + if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1) + else: matches[i-1] = "" + return rawMatches[0] proc find*(s: string, pattern: TRegEx, start = 0): int = ## returns the starting position of ``pattern`` in ``s``. If it does not ## match, -1 is returned. - return matchOrFind(s, pattern, start, 0'i32) + var matches: array[0..maxSubpatterns-1, string] + result = find(s, pattern, matches, start) template `=~` *(s: string, pattern: TRegEx): expr = ## This calls ``match`` with an implicit declared ``matches`` array that @@ -279,57 +293,36 @@ const ## common regular expressions ## describes an URL when isMainModule: - assert match("(a b c)", re"'(' @ ')'") - assert match("WHiLe", re(r"while", {reIgnoreCase})) + assert match("(a b c)", re"\( .* \)") + assert match("WHiLe", re("while", {reIgnoreCase})) assert "0158787".match(re"\d+") assert "ABC 0232".match(re"\w+\s+\d+") - assert "ABC".match(re"\d+ / \w+") - - for word in split("00232this02939is39an22example111", re"\d+"): - writeln(stdout, word) + assert "ABC".match(re"\d+ | \w+") assert matchLen("key", re(reIdentifier)) == 3 - var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+") + var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+" assert matchLen("key1= cal9", pattern) == 11 - var c: TMatchClosure - var s = "a+b + c +d+e+f" - assert m(s, expr.rule, 0, c) == len(s) - var a = "" - for i in 0..c.ml-1: - a.add(copy(s, c.matches[i][0], c.matches[i][1])) - assert a == "abcdef" - #echo expr.rule - - #const filename = "lib/devel/peg/grammar.txt" - #var grammar = parsePeg(newFileStream(filename, fmRead), filename) - #echo "a <- [abc]*?".match(grammar) - assert find("_____abc_______", term("abc")) == 5 - assert match("_______ana", peg"A <- 'ana' / . A") - assert match("abcs%%%", peg"A <- ..A / .A / '%'") - - if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": - assert matches[0] == "abc" + assert find("_____abc_______", re"abc") == 5 + + var matches: array[0..5, string] + if match("abcdefg", re"c(d)ef(g)", matches, 2): + assert matches[0] == "d" + assert matches[1] == "g" else: assert false - var g2 = peg"""S <- A B / C D - A <- 'a'+ - B <- 'b'+ - C <- 'c'+ - D <- 'd'+ - """ - assert($g2 == "((A B) / (C D))") - assert match("cccccdddddd", g2) - assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey; var2<-key2key2") - assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": - assert matches[0] == "a" + if "abc" =~ re"(a)bcxyz|(\w+)": + assert matches[1] == "abc" else: assert false + + assert "var1=key; var2=key2".endsWith(re"\w+=\w+") + assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") == + "var1<-keykey; var2<-key2key2") + for word in split("00232this02939is39an22example111", re"\d+"): + writeln(stdout, word) diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim index babf60108..b7ee165f5 100755 --- a/lib/pure/xmldom.nim +++ b/lib/pure/xmldom.nim @@ -1044,10 +1044,20 @@ proc target*(PI: PProcessingInstruction): string = # --Other stuff-- # Writer +proc addEscaped(s: string): string = + result = "" + for c in items(s): + case c + of '<': result.add("<") + of '>': result.add(">") + of '&': result.add("&") + of '"': result.add(""") + else: result.add(c) + proc nodeToXml(n: PNode, indent: int = 0): string = result = repeatChar(indent, ' ') & "<" & n.nodeName for i in items(n.Attributes): - result.add(" " & i.name & "=\"" & i.value & "\"") + result.add(" " & i.name & "=\"" & addEscaped(i.value) & "\"") if n.childNodes.len() == 0: result.add("/>") # No idea why this doesn't need a \n :O @@ -1060,7 +1070,7 @@ proc nodeToXml(n: PNode, indent: int = 0): string = result.add(nodeToXml(i, indent + 2)) of TextNode: result.add(repeatChar(indent * 2, ' ')) - result.add(i.nodeValue) + result.add(addEscaped(i.nodeValue)) of CDataSectionNode: result.add(repeatChar(indent * 2, ' ')) result.add("<![CDATA[" & i.nodeValue & "]]>") @@ -1080,4 +1090,4 @@ proc nodeToXml(n: PNode, indent: int = 0): string = proc `$`*(doc: PDocument): string = ## Converts a PDocument object into a string representation of it's XML result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" - result.add(nodeToXml(doc.documentElement)) \ No newline at end of file + result.add(nodeToXml(doc.documentElement)) diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim index f338ca2e5..d9eb210c3 100755 --- a/lib/pure/xmldomparser.nim +++ b/lib/pure/xmldomparser.nim @@ -165,4 +165,4 @@ when isMainModule: echo(i.nodeName, "=", i.namespaceURI) - echo($xml) \ No newline at end of file + echo($xml) diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim index 7b77fe156..c79b9ad40 100755 --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim @@ -223,9 +223,8 @@ proc newXmlTree*(tag: string, children: openArray[PXmlNode], result.fAttr = attributes proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} = - ## use this procedure to define a new XML tag - expectLen(e, 1) - var a = e[0] + expectLen(e, 2) + var a = e[1] if a.kind == nnkCall: result = newCall("newXmlTree", toStrLit(a[0])) var attrs = newCall("newStringTable", []) diff --git a/tests/accept/run/spec.csv b/tests/accept/run/spec.csv index c5e16685a..a733326f3 100755 --- a/tests/accept/run/spec.csv +++ b/tests/accept/run/spec.csv @@ -59,3 +59,4 @@ tstrutil.nim;ha/home/a1xyz/usr/bin tvardecl.nim;44 tvarnums.nim;Success! tvartup.nim;2 3 +txmltree.nim;true diff --git a/tests/accept/run/txmltree.nim b/tests/accept/run/txmltree.nim new file mode 100644 index 000000000..efd7b07af --- /dev/null +++ b/tests/accept/run/txmltree.nim @@ -0,0 +1,7 @@ + +import xmltree + +var x = <>a(href="nimrod.de", "www.nimrod-test.de") + +echo x == "<a href=\"nimrod.de\">www.nimrod-test.de" + diff --git a/web/index.txt b/web/index.txt index fd6a04242..c227c268a 100755 --- a/web/index.txt +++ b/web/index.txt @@ -19,7 +19,7 @@ Welcome to Nimrod .. code-block:: nimrod # Filter key=value pairs - import regexprs + import re for x in lines("myfile.txt"): if x =~ r"(\w+)=(.*)": @@ -70,7 +70,7 @@ Nimrod is expressive generics, etc. * User-defineable operators; code with new operators is often easier to read than code which overloads built-in operators. In the code snippet, the - ``=~`` operator is defined in the ``regexprs`` module. + ``=~`` operator is defined in the ``re`` module. * Macros can modify the abstract syntax tree at compile time. diff --git a/web/news.txt b/web/news.txt index 6ce9f7fcd..abe1c5e4d 100755 --- a/web/news.txt +++ b/web/news.txt @@ -24,6 +24,7 @@ Bugfixes - Fixed ``unicode.toUTF8``. - The compiler now rejects ``'\n'``. - ``times.getStartMilsecs()`` now works on Mac OS X. +- Fixed a bug in ``pegs.match`` concerning start offsets. Additions @@ -45,6 +46,7 @@ Additions - Added ``xmltree`` module. - Added ``xmlparser`` module. - Added ``htmlparser`` module. +- Added ``re`` module. - Many wrappers now do not contain redundant name prefixes (like ``GTK_``, ``lua``). The new wrappers are available in ``lib/newwrap``. Change your configuration file to use these. @@ -72,6 +74,7 @@ Changes affecting backwards compatibility named arguments only, because the parameter order will change the next version! - ``atomic`` and ``let`` are now keywords. +- The ``\w`` character class for pegs now includes the digits ``'0'..'9'``. 2009-12-21 Version 0.8.6 released diff --git a/web/snippets/snippet1.nim b/web/snippets/snippet1.nim index 05359a0e0..85cb98142 100755 --- a/web/snippets/snippet1.nim +++ b/web/snippets/snippet1.nim @@ -1,4 +1,4 @@ import strutils echo "Give a list of integers (separated by spaces): ", - stdin.readLine.splitSeq.each(parseInt).max, + stdin.readLine.split.each(parseInt).max, " is the maximum!" |