diff options
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/pure/pegs.nim | 16 | ||||
-rwxr-xr-x | lib/pure/re.nim | 85 | ||||
-rwxr-xr-x | lib/pure/xmldom.nim | 16 | ||||
-rwxr-xr-x | lib/pure/xmldomparser.nim | 2 | ||||
-rwxr-xr-x | lib/pure/xmltree.nim | 5 |
5 files changed, 67 insertions, 57 deletions
diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index 2cfae3201..201f13111 100755 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -604,7 +604,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string], ## match, nothing is written into ``matches`` and ``false`` is ## returned. var c: TMatchClosure - result = m(s, pattern, start, c) == len(s) + result = m(s, pattern, start, c) == len(s) -start if result: for i in 0..c.ml-1: matches[i] = copy(s, c.matches[i][0], c.matches[i][1]) @@ -612,7 +612,7 @@ proc match*(s: string, pattern: TPeg, matches: var openarray[string], proc match*(s: string, pattern: TPeg, start = 0): bool = ## returns ``true`` if ``s`` matches the ``pattern`` beginning from ``start``. var c: TMatchClosure - result = m(s, pattern, start, c) == len(s) + result = m(s, pattern, start, c) == len(s)-start proc matchLen*(s: string, pattern: TPeg, matches: var openarray[string], start = 0): int = @@ -1263,8 +1263,8 @@ proc primary(p: var TPegParser): TPeg = of "D": result = charset({'\1'..'\xff'} - {'0'..'9'}) of "s": result = charset({' ', '\9'..'\13'}) of "S": result = charset({'\1'..'\xff'} - {' ', '\9'..'\13'}) - of "w": result = charset({'a'..'z', 'A'..'Z', '_'}) - of "W": result = charset({'\1'..'\xff'} - {'a'..'z', 'A'..'Z', '_'}) + of "w": result = charset({'a'..'z', 'A'..'Z', '_', '0'..'9'}) + of "W": result = charset({'\1'..'\xff'} - {'a'..'z','A'..'Z','_','0'..'9'}) of "ident": result = pegs.ident else: pegError(p, "unknown built-in: " & p.tok.literal) getTok(p) @@ -1435,3 +1435,11 @@ when isMainModule: assert matches[0] == "a" else: assert false + + var matches: array[0..5, string] + if match("abcdefg", peg"'c' {'d'} 'ef' {'g'}", matches, 2): + assert matches[0] == "d" + assert matches[1] == "g" + else: + assert false + diff --git a/lib/pure/re.nim b/lib/pure/re.nim index 1964bf200..b4aa75637 100755 --- a/lib/pure/re.nim +++ b/lib/pure/re.nim @@ -75,12 +75,14 @@ proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string], var b = rawMatches[i * 2 + 1] if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1) else: matches[i-1] = "" - return res + return rawMatches[1] - rawMatches[0] proc matchOrFind(s: string, pattern: TRegEx, start, flags: cint): cint = var rawMatches: array [0..maxSubpatterns * 3 - 1, cint] - return pcreExec(pattern.h, nil, s, len(s), start, flags, - cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + result = pcreExec(pattern.h, nil, s, len(s), start, flags, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + if result >= 0'i32: + result = rawMatches[1] - rawMatches[0] proc match*(s: string, pattern: TRegEx, matches: var openarray[string], start = 0): bool = @@ -88,11 +90,12 @@ proc match*(s: string, pattern: TRegEx, matches: var openarray[string], ## the captured substrings in the array ``matches``. If it does not ## match, nothing is written into ``matches`` and ``false`` is ## returned. - return matchOrFind(s, pattern, matches, start, PCRE_ANCHORED) >= 0'i32 + return matchOrFind(s, pattern, matches, start, + PCRE_ANCHORED) == cint(s.len - start) proc match*(s: string, pattern: TRegEx, start = 0): bool = ## returns ``true`` if ``s[start..]`` matches the ``pattern``. - return matchOrFind(s, pattern, start, PCRE_ANCHORED) >= 0'i32 + return matchOrFind(s, pattern, start, PCRE_ANCHORED) == cint(s.len - start) proc matchLen*(s: string, pattern: TRegEx, matches: var openarray[string], start = 0): int = @@ -112,12 +115,23 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string], ## returns the starting position of ``pattern`` in ``s`` and the captured ## substrings in the array ``matches``. If it does not match, nothing ## is written into ``matches`` and -1 is returned. - return matchOrFind(s, pattern, matches, start, 0'i32) + var + rawMatches: array[0..maxSubpatterns * 3 - 1, cint] + res = pcreExec(pattern.h, nil, s, len(s), start, 0'i32, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + if res < 0'i32: return res + for i in 1..int(res)-1: + var a = rawMatches[i * 2] + var b = rawMatches[i * 2 + 1] + if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1) + else: matches[i-1] = "" + return rawMatches[0] proc find*(s: string, pattern: TRegEx, start = 0): int = ## returns the starting position of ``pattern`` in ``s``. If it does not ## match, -1 is returned. - return matchOrFind(s, pattern, start, 0'i32) + var matches: array[0..maxSubpatterns-1, string] + result = find(s, pattern, matches, start) template `=~` *(s: string, pattern: TRegEx): expr = ## This calls ``match`` with an implicit declared ``matches`` array that @@ -279,57 +293,36 @@ const ## common regular expressions ## describes an URL when isMainModule: - assert match("(a b c)", re"'(' @ ')'") - assert match("WHiLe", re(r"while", {reIgnoreCase})) + assert match("(a b c)", re"\( .* \)") + assert match("WHiLe", re("while", {reIgnoreCase})) assert "0158787".match(re"\d+") assert "ABC 0232".match(re"\w+\s+\d+") - assert "ABC".match(re"\d+ / \w+") - - for word in split("00232this02939is39an22example111", re"\d+"): - writeln(stdout, word) + assert "ABC".match(re"\d+ | \w+") assert matchLen("key", re(reIdentifier)) == 3 - var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+") + var pattern = re"[a-z0-9]+\s*=\s*[a-z0-9]+" assert matchLen("key1= cal9", pattern) == 11 - var c: TMatchClosure - var s = "a+b + c +d+e+f" - assert m(s, expr.rule, 0, c) == len(s) - var a = "" - for i in 0..c.ml-1: - a.add(copy(s, c.matches[i][0], c.matches[i][1])) - assert a == "abcdef" - #echo expr.rule - - #const filename = "lib/devel/peg/grammar.txt" - #var grammar = parsePeg(newFileStream(filename, fmRead), filename) - #echo "a <- [abc]*?".match(grammar) - assert find("_____abc_______", term("abc")) == 5 - assert match("_______ana", peg"A <- 'ana' / . A") - assert match("abcs%%%", peg"A <- ..A / .A / '%'") - - if "abc" =~ peg"{'a'}'bc' 'xyz' / {\ident}": - assert matches[0] == "abc" + assert find("_____abc_______", re"abc") == 5 + + var matches: array[0..5, string] + if match("abcdefg", re"c(d)ef(g)", matches, 2): + assert matches[0] == "d" + assert matches[1] == "g" else: assert false - var g2 = peg"""S <- A B / C D - A <- 'a'+ - B <- 'b'+ - C <- 'c'+ - D <- 'd'+ - """ - assert($g2 == "((A B) / (C D))") - assert match("cccccdddddd", g2) - assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == - "var1<-keykey; var2<-key2key2") - assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") - - if "aaaaaa" =~ peg"'aa' !. / ({'a'})+": - assert matches[0] == "a" + if "abc" =~ re"(a)bcxyz|(\w+)": + assert matches[1] == "abc" else: assert false + + assert "var1=key; var2=key2".endsWith(re"\w+=\w+") + assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") == + "var1<-keykey; var2<-key2key2") + for word in split("00232this02939is39an22example111", re"\d+"): + writeln(stdout, word) diff --git a/lib/pure/xmldom.nim b/lib/pure/xmldom.nim index babf60108..b7ee165f5 100755 --- a/lib/pure/xmldom.nim +++ b/lib/pure/xmldom.nim @@ -1044,10 +1044,20 @@ proc target*(PI: PProcessingInstruction): string = # --Other stuff-- # Writer +proc addEscaped(s: string): string = + result = "" + for c in items(s): + case c + of '<': result.add("<") + of '>': result.add(">") + of '&': result.add("&") + of '"': result.add(""") + else: result.add(c) + proc nodeToXml(n: PNode, indent: int = 0): string = result = repeatChar(indent, ' ') & "<" & n.nodeName for i in items(n.Attributes): - result.add(" " & i.name & "=\"" & i.value & "\"") + result.add(" " & i.name & "=\"" & addEscaped(i.value) & "\"") if n.childNodes.len() == 0: result.add("/>") # No idea why this doesn't need a \n :O @@ -1060,7 +1070,7 @@ proc nodeToXml(n: PNode, indent: int = 0): string = result.add(nodeToXml(i, indent + 2)) of TextNode: result.add(repeatChar(indent * 2, ' ')) - result.add(i.nodeValue) + result.add(addEscaped(i.nodeValue)) of CDataSectionNode: result.add(repeatChar(indent * 2, ' ')) result.add("<![CDATA[" & i.nodeValue & "]]>") @@ -1080,4 +1090,4 @@ proc nodeToXml(n: PNode, indent: int = 0): string = proc `$`*(doc: PDocument): string = ## Converts a PDocument object into a string representation of it's XML result = "<?xml version=\"1.0\" encoding=\"UTF-8\" ?>\n" - result.add(nodeToXml(doc.documentElement)) \ No newline at end of file + result.add(nodeToXml(doc.documentElement)) diff --git a/lib/pure/xmldomparser.nim b/lib/pure/xmldomparser.nim index f338ca2e5..d9eb210c3 100755 --- a/lib/pure/xmldomparser.nim +++ b/lib/pure/xmldomparser.nim @@ -165,4 +165,4 @@ when isMainModule: echo(i.nodeName, "=", i.namespaceURI) - echo($xml) \ No newline at end of file + echo($xml) diff --git a/lib/pure/xmltree.nim b/lib/pure/xmltree.nim index 7b77fe156..c79b9ad40 100755 --- a/lib/pure/xmltree.nim +++ b/lib/pure/xmltree.nim @@ -223,9 +223,8 @@ proc newXmlTree*(tag: string, children: openArray[PXmlNode], result.fAttr = attributes proc xmlConstructor(e: PNimrodNode): PNimrodNode {.compileTime.} = - ## use this procedure to define a new XML tag - expectLen(e, 1) - var a = e[0] + expectLen(e, 2) + var a = e[1] if a.kind == nnkCall: result = newCall("newXmlTree", toStrLit(a[0])) var attrs = newCall("newStringTable", []) |