Merge pull request #2818 from flaviut/add-nre

Add nre to stdlib
author: Andreas Rumpf <rumpf_a@web.de> 2015-06-12 00:13:19 +0200
committer: Andreas Rumpf <rumpf_a@web.de> 2015-06-12 00:13:19 +0200
commit: d31fe7666a949afced44e538d9a2dda3e29d4a28 (patch)
tree: 8d2ceac7aacbaaf842a1c3c192be5763610aef78 /tests
parent: e7bb37d12c11bae7ace5b876da2a50f6ffa5f8de (diff)
parent: 31514550d38132cdd64be723a7a25819dbf31f97 (diff)
download: Nim-d31fe7666a949afced44e538d9a2dda3e29d4a28.tar.gz
10 files changed, 245 insertions, 0 deletions
diff --git a/tests/stdlib/nre/captures.nim b/tests/stdlib/nre/captures.nim
new file mode 100644
index 000000000..4f3f15444
--- /dev/null
+++ b/tests/stdlib/nre/captures.nim
@@ -0,0 +1,59 @@
+import unittest, optional_nonstrict
+include nre
+
+suite "captures":
+  test "map capture names to numbers":
+    check(getNameToNumberTable(re("(?<v1>1(?<v2>2(?<v3>3))(?'v4'4))()")) == 
+      { "v1" : 0, "v2" : 1, "v3" : 2, "v4" : 3 }.toTable())
+
+  test "capture bounds are correct":
+    let ex1 = re("([0-9])")
+    check("1 23".find(ex1).matchBounds == 0 .. 0)
+    check("1 23".find(ex1).captureBounds[0].get == 0 .. 0)
+    check("1 23".find(ex1, 1).matchBounds == 2 .. 2)
+    check("1 23".find(ex1, 3).matchBounds == 3 .. 3)
+
+    let ex2 = re("()()()()()()()()()()([0-9])")
+    check("824".find(ex2).captureBounds[0].get == 0 .. -1)
+    check("824".find(ex2).captureBounds[10].get == 0 .. 0)
+
+    let ex3 = re("([0-9]+)")
+    check("824".find(ex3).captureBounds[0].get == 0 .. 2)
+
+  test "named captures":
+    let ex1 = "foobar".find(re("(?<foo>foo)(?<bar>bar)"))
+    check(ex1.captures["foo"] == "foo")
+    check(ex1.captures["bar"] == "bar")
+
+    let ex2 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check(ex2.captures["foo"] == "foo")
+    check(ex2.captures["bar"] == nil)
+
+  test "named capture bounds":
+    let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check(ex1.captureBounds["foo"] == some(0..2))
+    check(ex1.captureBounds["bar"] == none(Slice[int]))
+
+  test "capture count":
+    let ex1 = re("(?<foo>foo)(?<bar>bar)?")
+    check(ex1.captureCount == 2)
+    check(ex1.captureNameId == {"foo" : 0, "bar" : 1}.toTable())
+
+  test "named capture table":
+    let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check(ex1.captures.toTable == {"foo" : "foo", "bar" : nil}.toTable())
+    check(ex1.captureBounds.toTable == {"foo" : some(0..2), "bar" : none(Slice[int])}.toTable())
+    check(ex1.captures.toTable("") == {"foo" : "foo", "bar" : ""}.toTable())
+
+    let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check(ex2.captures.toTable == {"foo" : "foo", "bar" : "bar"}.toTable())
+
+  test "capture sequence":
+    let ex1 = "foo".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check(ex1.captures.toSeq == @["foo", nil])
+    check(ex1.captureBounds.toSeq == @[some(0..2), none(Slice[int])])
+    check(ex1.captures.toSeq("") == @["foo", ""])
+
+    let ex2 = "foobar".find(re("(?<foo>foo)(?<bar>bar)?"))
+    check(ex2.captures.toSeq == @["foo", "bar"])
+
diff --git a/tests/stdlib/nre/escape.nim b/tests/stdlib/nre/escape.nim
new file mode 100644
index 000000000..db5e8a001
--- /dev/null
+++ b/tests/stdlib/nre/escape.nim
@@ -0,0 +1,7 @@
+import nre, unittest
+
+suite "escape strings":
+  test "escape strings":
+    check("123".escapeRe() == "123")
+    check("[]".escapeRe() == r"\[\]")
+    check("()".escapeRe() == r"\(\)")
diff --git a/tests/stdlib/nre/find.nim b/tests/stdlib/nre/find.nim
new file mode 100644
index 000000000..05bfb848a
--- /dev/null
+++ b/tests/stdlib/nre/find.nim
@@ -0,0 +1,25 @@
+import unittest, sequtils, nre, optional_nonstrict
+
+suite "find":
+  test "find text":
+    check("3213a".find(re"[a-z]").match == "a")
+    check(toSeq(findIter("1 2 3 4 5 6 7 8 ", re" ")).map(
+      proc (a: RegexMatch): string = a.match
+    ) == @[" ", " ", " ", " ", " ", " ", " ", " "])
+
+  test "find bounds":
+    check(toSeq(findIter("1 2 3 4 5 ", re" ")).map(
+      proc (a: RegexMatch): Slice[int] = a.matchBounds
+    ) == @[1..1, 3..3, 5..5, 7..7, 9..9])
+
+  test "overlapping find":
+    check("222".findAll(re"22") == @["22"])
+    check("2222".findAll(re"22") == @["22", "22"])
+
+  test "len 0 find":
+    check("".findAll(re"\ ") == newSeq[string]())
+    check("".findAll(re"") == @[""])
+    check("abc".findAll(re"") == @["", "", "", ""])
+    check("word word".findAll(re"\b") == @["", "", "", ""])
+    check("word\r\lword".findAll(re"(*ANYCRLF)(?m)$") == @["", ""])
+    check("слово слово".findAll(re"(*U)\b") == @["", "", "", ""])
diff --git a/tests/stdlib/nre/init.nim b/tests/stdlib/nre/init.nim
new file mode 100644
index 000000000..1a1470842
--- /dev/null
+++ b/tests/stdlib/nre/init.nim
@@ -0,0 +1,36 @@
+import unittest
+include nre
+
+suite "Test NRE initialization":
+  test "correct intialization":
+    check(re("[0-9]+") != nil)
+    check(re("(?i)[0-9]+") != nil)
+
+  test "options":
+    check(extractOptions("(*NEVER_UTF)") ==
+          ("", pcre.NEVER_UTF, true))
+    check(extractOptions("(*UTF8)(*ANCHORED)(*UCP)z") ==
+          ("(*UTF8)(*UCP)z", pcre.ANCHORED, true))
+    check(extractOptions("(*ANCHORED)(*UTF8)(*JAVASCRIPT_COMPAT)z") ==
+          ("(*UTF8)z", pcre.ANCHORED or pcre.JAVASCRIPT_COMPAT, true))
+
+    check(extractOptions("(*NO_STUDY)(") == ("(", 0, false))
+
+    check(extractOptions("(*LIMIT_MATCH=6)(*ANCHORED)z") ==
+          ("(*LIMIT_MATCH=6)z", pcre.ANCHORED, true))
+
+  test "incorrect options":
+    for s in ["CR", "(CR", "(*CR", "(*abc)", "(*abc)CR",
+              "(?i)",
+              "(*LIMIT_MATCH=5", "(*NO_AUTO_POSSESS=5)"]:
+      let ss = s & "(*NEVER_UTF)"
+      check(extractOptions(ss) == (ss, 0, true))
+
+  test "invalid regex":
+    expect(SyntaxError): discard re("[0-9")
+    try:
+      discard re("[0-9")
+    except SyntaxError:
+      let ex = SyntaxError(getCurrentException())
+      check(ex.pos == 4)
+      check(ex.pattern == "[0-9")
diff --git a/tests/stdlib/nre/match.nim b/tests/stdlib/nre/match.nim
new file mode 100644
index 000000000..38ee5214b
--- /dev/null
+++ b/tests/stdlib/nre/match.nim
@@ -0,0 +1,18 @@
+include nre, unittest, optional_nonstrict
+
+suite "match":
+  test "upper bound must be inclusive":
+    check("abc".match(re"abc", endpos = -1) == none(RegexMatch))
+    check("abc".match(re"abc", endpos = 1) == none(RegexMatch))
+    check("abc".match(re"abc", endpos = 2) != none(RegexMatch))
+
+  test "match examples":
+    check("abc".match(re"(\w)").captures[0] == "a")
+    check("abc".match(re"(?<letter>\w)").captures["letter"] == "a")
+    check("abc".match(re"(\w)\w").captures[-1] == "ab")
+    check("abc".match(re"(\w)").captureBounds[0].get == 0 .. 0)
+    check("abc".match(re"").captureBounds[-1].get == 0 .. -1)
+    check("abc".match(re"abc").captureBounds[-1].get == 0 .. 2)
+
+  test "match test cases":
+    check("123".match(re"").matchBounds == 0 .. -1)
diff --git a/tests/stdlib/nre/misc.nim b/tests/stdlib/nre/misc.nim
new file mode 100644
index 000000000..f4a88b639
--- /dev/null
+++ b/tests/stdlib/nre/misc.nim
@@ -0,0 +1,16 @@
+import unittest, nre, strutils, optional_nonstrict
+
+suite "Misc tests":
+  test "unicode":
+    check("".find(re"(*UTF8)").match == "")
+    check("перевірка".replace(re"(*U)\w", "") == "")
+
+  test "empty or non-empty match":
+    check("abc".findall(re"|.").join(":") == ":a::b::c:")
+    check("abc".findall(re".|").join(":") == "a:b:c:")
+
+    check("abc".replace(re"|.", "x") == "xxxxxxx")
+    check("abc".replace(re".|", "x") == "xxxx")
+
+    check("abc".split(re"|.").join(":") == ":::::")
+    check("abc".split(re".|").join(":") == ":::")
diff --git a/tests/stdlib/nre/optional_nonstrict.nim b/tests/stdlib/nre/optional_nonstrict.nim
new file mode 100644
index 000000000..d13f4fab7
--- /dev/null
+++ b/tests/stdlib/nre/optional_nonstrict.nim
@@ -0,0 +1,3 @@
+import options
+converter option2val*[T](val: Option[T]): T =
+  return val.get()
diff --git a/tests/stdlib/nre/replace.nim b/tests/stdlib/nre/replace.nim
new file mode 100644
index 000000000..516fd4328
--- /dev/null
+++ b/tests/stdlib/nre/replace.nim
@@ -0,0 +1,20 @@
+include nre
+import unittest
+
+suite "replace":
+  test "replace with 0-length strings":
+    check("".replace(re"1", proc (v: RegexMatch): string = "1") == "")
+    check(" ".replace(re"", proc (v: RegexMatch): string = "1") == "1 1")
+    check("".replace(re"", proc (v: RegexMatch): string = "1") == "1")
+
+  test "regular replace":
+    check("123".replace(re"\d", "foo") == "foofoofoo")
+    check("123".replace(re"(\d)", "$1$1") == "112233")
+    check("123".replace(re"(\d)(\d)", "$1$2") == "123")
+    check("123".replace(re"(\d)(\d)", "$#$#") == "123")
+    check("123".replace(re"(?<foo>\d)(\d)", "$foo$#$#") == "1123")
+    check("123".replace(re"(?<foo>\d)(\d)", "${foo}$#$#") == "1123")
+
+  test "replacing missing captures should throw instead of segfaulting":
+    expect ValueError: discard "ab".replace(re"(a)|(b)", "$1$2")
+    expect ValueError: discard "b".replace(re"(a)?(b)", "$1$2")
diff --git a/tests/stdlib/nre/split.nim b/tests/stdlib/nre/split.nim
new file mode 100644
index 000000000..8064e40b7
--- /dev/null
+++ b/tests/stdlib/nre/split.nim
@@ -0,0 +1,52 @@
+import unittest, strutils
+include nre
+
+suite "string splitting":
+  test "splitting strings":
+    check("1 2 3 4 5 6 ".split(re" ") == @["1", "2", "3", "4", "5", "6", ""])
+    check("1  2  ".split(re(" ")) == @["1", "", "2", "", ""])
+    check("1 2".split(re(" ")) == @["1", "2"])
+    check("foo".split(re("foo")) == @["", ""])
+    check("".split(re"foo") == @[""])
+
+  test "captured patterns":
+    check("12".split(re"(\d)") == @["", "1", "", "2", ""])
+
+  test "maxsplit":
+    check("123".split(re"", maxsplit = 2) == @["1", "23"])
+    check("123".split(re"", maxsplit = 1) == @["123"])
+    check("123".split(re"", maxsplit = -1) == @["1", "2", "3"])
+
+  test "split with 0-length match":
+    check("12345".split(re("")) == @["1", "2", "3", "4", "5"])
+    check("".split(re"") == newSeq[string]())
+    check("word word".split(re"\b") == @["word", " ", "word"])
+    check("word\r\lword".split(re"(*ANYCRLF)(?m)$") == @["word", "\r\lword"])
+    check("слово слово".split(re"(*U)(\b)") == @["", "слово", "", " ", "", "слово", ""])
+
+  test "perl split tests":
+    check("forty-two"                    .split(re"")      .join(",") == "f,o,r,t,y,-,t,w,o")
+    check("forty-two"                    .split(re"", 3)   .join(",") == "f,o,rty-two")
+    check("split this string"            .split(re" ")     .join(",") == "split,this,string")
+    check("split this string"            .split(re" ", 2)  .join(",") == "split,this string")
+    check("try$this$string"              .split(re"\$")    .join(",") == "try,this,string")
+    check("try$this$string"              .split(re"\$", 2) .join(",") == "try,this$string")
+    check("comma, separated, values"     .split(re", ")    .join("|") == "comma|separated|values")
+    check("comma, separated, values"     .split(re", ", 2) .join("|") == "comma|separated, values")
+    check("Perl6::Camelia::Test"         .split(re"::")    .join(",") == "Perl6,Camelia,Test")
+    check("Perl6::Camelia::Test"         .split(re"::", 2) .join(",") == "Perl6,Camelia::Test")
+    check("split,me,please"              .split(re",")     .join("|") == "split|me|please")
+    check("split,me,please"              .split(re",", 2)  .join("|") == "split|me,please")
+    check("Hello World    Goodbye   Mars".split(re"\s+")   .join(",") == "Hello,World,Goodbye,Mars")
+    check("Hello World    Goodbye   Mars".split(re"\s+", 3).join(",") == "Hello,World,Goodbye   Mars")
+    check("Hello test"                   .split(re"(\s+)") .join(",") == "Hello, ,test")
+    check("this will be split"           .split(re" ")     .join(",") == "this,will,be,split")
+    check("this will be split"           .split(re" ", 3)  .join(",") == "this,will,be split")
+    check("a.b"                          .split(re"\.")    .join(",") == "a,b")
+    check(""                             .split(re"")      .len       == 0)
+    check(":"                            .split(re"")      .len       == 1)
+
+  test "start position":
+    check("abc".split(re"", start = 1) == @["b", "c"])
+    check("abc".split(re"", start = 2) == @["c"])
+    check("abc".split(re"", start = 3) == newSeq[string]())
diff --git a/tests/stdlib/tnre.nim b/tests/stdlib/tnre.nim
new file mode 100644
index 000000000..85792b81e
--- /dev/null
+++ b/tests/stdlib/tnre.nim
@@ -0,0 +1,9 @@
+import nre
+import nre.init
+import nre.captures
+import nre.find
+import nre.split
+import nre.match
+import nre.replace
+import nre.escape
+import nre.misc
author	Andreas Rumpf <rumpf_a@web.de>	2015-06-12 00:13:19 +0200
committer	Andreas Rumpf <rumpf_a@web.de>	2015-06-12 00:13:19 +0200
commit	d31fe7666a949afced44e538d9a2dda3e29d4a28 (patch)
tree	8d2ceac7aacbaaf842a1c3c192be5763610aef78 /tests
parent	e7bb37d12c11bae7ace5b876da2a50f6ffa5f8de (diff)
parent	31514550d38132cdd64be723a7a25819dbf31f97 (diff)
download	Nim-d31fe7666a949afced44e538d9a2dda3e29d4a28.tar.gz