diff options
Diffstat (limited to 'lib')
-rwxr-xr-x | lib/impure/re.nim | 144 | ||||
-rwxr-xr-x | lib/oldwrappers/tcl.nim | 2 | ||||
-rw-r--r-- | lib/pure/algorithm.nim | 2 | ||||
-rwxr-xr-x | lib/pure/pegs.nim | 24 | ||||
-rwxr-xr-x | lib/pure/strutils.nim | 4 | ||||
-rwxr-xr-x | lib/system.nim | 9 | ||||
-rwxr-xr-x | lib/system/inclrtl.nim | 2 | ||||
-rwxr-xr-x | lib/wrappers/claro.nim | 2 | ||||
-rwxr-xr-x | lib/wrappers/pcre.nim | 4 | ||||
-rwxr-xr-x | lib/wrappers/tcl.nim | 2 | ||||
-rwxr-xr-x | lib/wrappers/tre.nim | 2 |
11 files changed, 143 insertions, 54 deletions
diff --git a/lib/impure/re.nim b/lib/impure/re.nim index b74116395..36adf5d1f 100755 --- a/lib/impure/re.nim +++ b/lib/impure/re.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2010 Andreas Rumpf +# (c) Copyright 2011 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. @@ -32,46 +32,57 @@ type reIgnoreCase = 0, ## do caseless matching reMultiLine = 1, ## ``^`` and ``$`` match newlines within data reDotAll = 2, ## ``.`` matches anything including NL - reExtended = 3 ## ignore whitespace and ``#`` comments + reExtended = 3, ## ignore whitespace and ``#`` comments + reStudy = 4 ## study the expression (may be omitted if the + ## expression will be used only once) TRegExDesc {.pure, final.} = object h: PPcre + e: ptr TExtra TRegEx* = ref TRegExDesc ## a compiled regular expression EInvalidRegEx* = object of EInvalidValue ## is raised if the pattern is no valid regular expression. +proc raiseInvalidRegex(msg: string) {.noinline, noreturn.} = + var e: ref EInvalidRegEx + new(e) + e.msg = msg + raise e + proc rawCompile(pattern: string, flags: cint): PPcre = var msg: CString offset: cint - com = pcre.Compile(pattern, flags, addr(msg), addr(offset), nil) - if com == nil: - var e: ref EInvalidRegEx - new(e) - e.msg = $msg & "\n" & pattern & "\n" & repeatChar(offset) & "^\n" - raise e - return com + result = pcre.Compile(pattern, flags, addr(msg), addr(offset), nil) + if result == nil: + raiseInvalidRegEx($msg & "\n" & pattern & "\n" & repeatChar(offset) & "^\n") proc finalizeRegEx(x: TRegEx) = # XXX This is a hack, but PCRE does not export its "free" function properly. # Sigh. The hack relies on PCRE's implementation (see ``pcre_get.c``). # Fortunately the implementation is unlikely to change. pcre.free_substring(cast[cstring](x.h)) + if not isNil(x.e): + pcre.free_substring(cast[cstring](x.e)) -proc re*(s: string, flags = {reExtended}): TRegEx = +proc re*(s: string, flags = {reExtended, reStudy}): TRegEx = ## Constructor of regular expressions. Note that Nimrod's ## extended raw string literals support this syntax ``re"[abc]"`` as ## a short form for ``re(r"[abc]")``. new(result, finalizeRegEx) - result.h = rawCompile(s, cast[cint](flags)) - + result.h = rawCompile(s, cast[cint](flags - {reStudy})) + if reStudy in flags: + var msg: cstring + result.e = pcre.study(result.h, 0, msg) + if not isNil(msg): raiseInvalidRegex($msg) + proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string], start, flags: cint): cint = var rawMatches: array[0..maxSubpatterns * 3 - 1, cint] - res = pcre.Exec(pattern.h, nil, s, len(s), start, flags, + res = pcre.Exec(pattern.h, pattern.e, s, len(s), start, flags, cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) if res < 0'i32: return res for i in 1..int(res)-1: @@ -83,13 +94,13 @@ proc matchOrFind(s: string, pattern: TRegEx, matches: var openarray[string], proc findBounds*(s: string, pattern: TRegEx, matches: var openarray[string], start = 0): tuple[first, last: int] = - ## returns the starting position and end position of ``pattern`` in ``s`` + ## returns the starting position and end position of `pattern` in `s` ## and the captured - ## substrings in the array ``matches``. If it does not match, nothing - ## is written into ``matches`` and (-1,0) is returned. + ## substrings in the array `matches`. If it does not match, nothing + ## is written into `matches` and ``(-1,0)`` is returned. var rawMatches: array[0..maxSubpatterns * 3 - 1, cint] - res = pcre.Exec(pattern.h, nil, s, len(s), start, 0'i32, + res = pcre.Exec(pattern.h, pattern.e, s, len(s), start, 0'i32, cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) if res < 0'i32: return (-1, 0) for i in 1..int(res)-1: @@ -98,10 +109,40 @@ proc findBounds*(s: string, pattern: TRegEx, matches: var openarray[string], if a >= 0'i32: matches[i-1] = copy(s, int(a), int(b)-1) else: matches[i-1] = "" return (rawMatches[0].int, rawMatches[1].int - 1) + +proc findBounds*(s: string, pattern: TRegEx, + matches: var openarray[tuple[first, last: int]], + start = 0): tuple[first, last: int] = + ## returns the starting position and end position of ``pattern`` in ``s`` + ## and the captured substrings in the array `matches`. + ## If it does not match, nothing is written into `matches` and + ## ``(-1,0)`` is returned. + var + rawMatches: array[0..maxSubpatterns * 3 - 1, cint] + res = pcre.Exec(pattern.h, pattern.e, s, len(s), start, 0'i32, + cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) + if res < 0'i32: return (-1, 0) + for i in 1..int(res)-1: + var a = rawMatches[i * 2] + var b = rawMatches[i * 2 + 1] + if a >= 0'i32: matches[i-1] = (int(a), int(b)-1) + else: matches[i-1] = (-1,0) + return (rawMatches[0].int, rawMatches[1].int - 1) +proc findBounds*(s: string, pattern: TRegEx, + start = 0): tuple[first, last: int] = + ## returns the starting position of `pattern` in `s`. If it does not + ## match, ``(-1,0)`` is returned. + var + rawMatches: array[0..3 - 1, cint] + res = pcre.Exec(pattern.h, nil, s, len(s), start, 0'i32, + cast[ptr cint](addr(rawMatches)), 3) + if res < 0'i32: return (int(res), 0) + return (int(rawMatches[0]), int(rawMatches[1]-1)) + proc matchOrFind(s: string, pattern: TRegEx, start, flags: cint): cint = var rawMatches: array [0..maxSubpatterns * 3 - 1, cint] - result = pcre.Exec(pattern.h, nil, s, len(s), start, flags, + result = pcre.Exec(pattern.h, pattern.e, s, len(s), start, flags, cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) if result >= 0'i32: result = rawMatches[1] - rawMatches[0] @@ -139,7 +180,7 @@ proc find*(s: string, pattern: TRegEx, matches: var openarray[string], ## is written into ``matches`` and -1 is returned. var rawMatches: array[0..maxSubpatterns * 3 - 1, cint] - res = pcre.Exec(pattern.h, nil, s, len(s), start, 0'i32, + res = pcre.Exec(pattern.h, pattern.e, s, len(s), start, 0'i32, cast[ptr cint](addr(rawMatches)), maxSubpatterns * 3) if res < 0'i32: return res for i in 1..int(res)-1: @@ -219,31 +260,64 @@ proc endsWith*(s: string, suffix: TRegEx): bool = for i in 0 .. s.len-1: if matchLen(s, suffix, i) == s.len - i: return true -proc replace*(s: string, sub: TRegEx, by: string): string = +proc replace*(s: string, sub: TRegEx, by = ""): string = + ## Replaces `sub` in `s` by the string `by`. Captures cannot be + ## accessed in `by`. Examples: + ## + ## .. code-block:: nimrod + ## "var1=key; var2=key2".replace(re"(\w+)'='(\w+)") + ## + ## Results in: + ## + ## .. code-block:: nimrod + ## + ## "; " + result = "" + var prev = 0 + while true: + var match = findBounds(s, sub, prev) + if match.first < 0: break + add(result, copy(s, prev, match.first-1)) + add(result, by) + prev = match.last + 1 + add(result, copy(s, prev)) + +proc replacef*(s: string, sub: TRegEx, by: string): string = ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: ## ## .. code-block:: nimrod - ## "var1=key; var2=key2".replace(re"(\w+)'='(\w+)", "$1<-$2$2") + ## "var1=key; var2=key2".replace(re"(\w+)'='(\w+)", "$1<-$2$2") ## ## Results in: ## ## .. code-block:: nimrod ## - ## "var1<-keykey; val2<-key2key2" + ## "var1<-keykey; val2<-key2key2" result = "" - var i = 0 var caps: array[0..maxSubpatterns-1, string] - while i < s.len: - var x = matchLen(s, sub, caps, i) - if x <= 0: - add(result, s[i]) - inc(i) - else: - addf(result, by, caps) - inc(i, x) - # copy the rest: - add(result, copy(s, i)) + var prev = 0 + while true: + var match = findBounds(s, sub, caps, prev) + if match.first < 0: break + add(result, copy(s, prev, match.first-1)) + addf(result, by, caps) + prev = match.last + 1 + add(result, copy(s, prev)) + when false: + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) + # copy the rest: + add(result, copy(s, i)) proc parallelReplace*(s: string, subs: openArray[ tuple[pattern: TRegEx, repl: string]]): string = @@ -376,8 +450,10 @@ when isMainModule: assert false assert "var1=key; var2=key2".endsWith(re"\w+=\w+") - assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") == + assert("var1=key; var2=key2".replacef(re"(\w+)=(\w+)", "$1<-$2$2") == "var1<-keykey; var2<-key2key2") + assert("var1=key; var2=key2".replace(re"(\w+)=(\w+)", "$1<-$2$2") == + "$1<-$2$2; $1<-$2$2") for word in split("00232this02939is39an22example111", re"\d+"): writeln(stdout, word) diff --git a/lib/oldwrappers/tcl.nim b/lib/oldwrappers/tcl.nim index 813714ecd..5e16b798e 100755 --- a/lib/oldwrappers/tcl.nim +++ b/lib/oldwrappers/tcl.nim @@ -37,7 +37,7 @@ when defined(WIN32): const dllName = "tcl(85|84|83|82|81|80).dll" elif defined(macosx): - const dllName = "libtcl(8.5|8.4|8.3|8.2|8.1).dynlib" + const dllName = "libtcl(8.5|8.4|8.3|8.2|8.1).dylib" else: const dllName = "libtcl(8.5|8.4|8.3|8.2|8.1).so.(1|0)" diff --git a/lib/pure/algorithm.nim b/lib/pure/algorithm.nim index c9e5b0e14..517819e1c 100644 --- a/lib/pure/algorithm.nim +++ b/lib/pure/algorithm.nim @@ -1,7 +1,7 @@ # # # Nimrod's Runtime Library -# (c) Copyright 2010 Andreas Rumpf +# (c) Copyright 2011 Andreas Rumpf # # See the file "copying.txt", included in this # distribution, for details about the copyright. diff --git a/lib/pure/pegs.nim b/lib/pure/pegs.nim index de968bff4..334f5dcd3 100755 --- a/lib/pure/pegs.nim +++ b/lib/pure/pegs.nim @@ -874,7 +874,7 @@ proc endsWith*(s: string, suffix: TPeg, start = 0): bool {. for i in start .. s.len-1: if matchLen(s, suffix, i) == s.len - i: return true -proc replace*(s: string, sub: TPeg, by: string): string {. +proc replacef*(s: string, sub: TPeg, by: string): string {. nosideEffect, rtl, extern: "npegs$1".} = ## Replaces `sub` in `s` by the string `by`. Captures can be accessed in `by` ## with the notation ``$i`` and ``$#`` (see strutils.`%`). Examples: @@ -898,7 +898,23 @@ proc replace*(s: string, sub: TPeg, by: string): string {. else: addf(result, by, caps) inc(i, x) - # copy the rest: + add(result, copy(s, i)) + +proc replace*(s: string, sub: TPeg, by = ""): string {. + nosideEffect, rtl, extern: "npegs$1".} = + ## Replaces `sub` in `s` by the string `by`. Captures cannot be accessed + ## in `by`. + result = "" + var i = 0 + var caps: array[0..maxSubpatterns-1, string] + while i < s.len: + var x = matchLen(s, sub, caps, i) + if x <= 0: + add(result, s[i]) + inc(i) + else: + addf(result, by, caps) + inc(i, x) add(result, copy(s, i)) proc parallelReplace*(s: string, subs: openArray[ @@ -1691,7 +1707,7 @@ when isMainModule: """ assert($g2 == "((A B) / (C D))") assert match("cccccdddddd", g2) - assert("var1=key; var2=key2".replace(peg"{\ident}'='{\ident}", "$1<-$2$2") == + assert("var1=key; var2=key2".replacef(peg"{\ident}'='{\ident}", "$1<-$2$2") == "var1<-keykey; var2<-key2key2") assert "var1=key; var2=key2".endsWith(peg"{\ident}'='{\ident}") @@ -1722,7 +1738,7 @@ when isMainModule: assert match("EINE ÜBERSICHT UND AUSSERDEM", peg"(\upper \white*)+") assert(not match("456678", peg"(\letter)+")) - assert("var1 = key; var2 = key2".replace( + assert("var1 = key; var2 = key2".replacef( peg"\skip(\s*) {\ident}'='{\ident}", "$1<-$2$2") == "var1<-keykey;var2<-key2key2") diff --git a/lib/pure/strutils.nim b/lib/pure/strutils.nim index 0673a9588..435f522eb 100755 --- a/lib/pure/strutils.nim +++ b/lib/pure/strutils.nim @@ -687,7 +687,7 @@ proc contains*(s: string, chars: set[char]): bool {.noSideEffect.} = ## Same as ``find(s, chars) >= 0``. return find(s, chars) >= 0 -proc replace*(s, sub, by: string): string {.noSideEffect, +proc replace*(s, sub: string, by = ""): string {.noSideEffect, rtl, extern: "nsuReplaceStr".} = ## Replaces `sub` in `s` by the string `by`. var a: TSkipTable @@ -800,7 +800,7 @@ proc escape*(s: string, prefix = "\"", suffix = "\""): string {.noSideEffect, ## by ``\xHH`` where ``HH`` is its hexadecimal value. ## The procedure has been designed so that its output is usable for many ## different common syntaxes. The resulting string is prefixed with - ## ``prefix`` and suffixed with ``suffix``. Both may be empty strings. + ## `prefix` and suffixed with `suffix`. Both may be empty strings. result = prefix for c in items(s): case c diff --git a/lib/system.nim b/lib/system.nim index aeca9b683..e1b6aeb4f 100755 --- a/lib/system.nim +++ b/lib/system.nim @@ -65,11 +65,11 @@ proc `not` *(x: bool): bool {.magic: "Not", noSideEffect.} proc `and`*(x, y: bool): bool {.magic: "And", noSideEffect.} ## Boolean ``and``; returns true iff ``x == y == true``. - ## Evaluation is short-circuited: this means that if ``x`` is false, + ## Evaluation is lazy: if ``x`` is false, ## ``y`` will not even be evaluated. proc `or`*(x, y: bool): bool {.magic: "Or", noSideEffect.} ## Boolean ``or``; returns true iff ``not (not x and not y)``. - ## Evaluation is short-circuited: this means that if ``x`` is true, + ## Evaluation is lazy: if ``x`` is true, ## ``y`` will not even be evaluated. proc `xor`*(x, y: bool): bool {.magic: "Xor", noSideEffect.} ## Boolean `exclusive or`; returns true iff ``x != y``. @@ -623,7 +623,7 @@ template `not_in` * (x, y: expr): expr = not contains(y, x) proc `is` *[T, S](x: T, y: S): bool {.magic: "Is", noSideEffect.} template `is_not` *(x, y: expr): expr = not (x is y) -proc cmp*[T, S: typeDesc](x: T, y: S): int {.procvar.} = +proc cmp*[T](x, y: T): int {.procvar.} = ## Generic compare proc. Returns a value < 0 iff x < y, a value > 0 iff x > y ## and 0 iff x == y. This is useful for writing generic algorithms without ## performance loss. This generic implementation uses the `==` and `<` @@ -1034,9 +1034,6 @@ iterator countup*[S, T](a: S, b: T, step = 1): T {.inline.} = while res <= b: yield res inc(res, step) - # we cannot use ``for x in a..b: `` here, because that is not - # known in the System module - proc min*(x, y: int): int {.magic: "MinI", noSideEffect.} proc min*(x, y: int8): int8 {.magic: "MinI", noSideEffect.} diff --git a/lib/system/inclrtl.nim b/lib/system/inclrtl.nim index e4644b969..3898355c8 100755 --- a/lib/system/inclrtl.nim +++ b/lib/system/inclrtl.nim @@ -31,7 +31,7 @@ elif defined(useNimRtl): when hostOS == "windows": const nimrtl* = "nimrtl.dll" elif hostOS == "macosx": - const nimrtl* = "nimrtl.dynlib" + const nimrtl* = "nimrtl.dylib" else: const nimrtl* = "libnimrtl.so" {.pragma: rtl, importc: "nimrtl_$1", dynlib: nimrtl.} diff --git a/lib/wrappers/claro.nim b/lib/wrappers/claro.nim index feab2a216..fb06da818 100755 --- a/lib/wrappers/claro.nim +++ b/lib/wrappers/claro.nim @@ -27,7 +27,7 @@ when defined(windows): clarodll = "claro.dll" elif defined(macosx): const - clarodll = "libclaro.dynlib" + clarodll = "libclaro.dylib" else: const clarodll = "libclaro.so" diff --git a/lib/wrappers/pcre.nim b/lib/wrappers/pcre.nim index 7e75035da..ef397e966 100755 --- a/lib/wrappers/pcre.nim +++ b/lib/wrappers/pcre.nim @@ -39,7 +39,7 @@ when not defined(pcreDll): when hostOS == "windows": const pcreDll = "pcre3.dll" elif hostOS == "macosx": - const pcreDll = "libpcre.dynlib" + const pcreDll = "libpcre(.3|).dylib" else: const pcreDll = "libpcre.so(.3|)" @@ -275,7 +275,7 @@ proc maketables*(): ptr char{.cdecl, importc: "pcre_maketables", dynlib: pcredll.} proc refcount*(a2: ptr TPcre, a3: cint): cint{.cdecl, importc: "pcre_refcount", dynlib: pcredll.} -proc study*(a2: ptr TPcre, a3: cint, a4: cstringArray): ptr Textra{.cdecl, +proc study*(a2: ptr TPcre, a3: cint, a4: var cstring): ptr Textra{.cdecl, importc: "pcre_study", dynlib: pcredll.} proc version*(): cstring{.cdecl, importc: "pcre_version", dynlib: pcredll.} diff --git a/lib/wrappers/tcl.nim b/lib/wrappers/tcl.nim index 0134abad7..6f0368574 100755 --- a/lib/wrappers/tcl.nim +++ b/lib/wrappers/tcl.nim @@ -39,7 +39,7 @@ when defined(WIN32): dllName = "tcl(85|84|83|82|81|80).dll" elif defined(macosx): const - dllName = "libtcl(8.5|8.4|8.3|8.2|8.1).dynlib" + dllName = "libtcl(8.5|8.4|8.3|8.2|8.1).dylib" else: const dllName = "libtcl(8.5|8.4|8.3|8.2|8.1).so.(1|0)" diff --git a/lib/wrappers/tre.nim b/lib/wrappers/tre.nim index f83d402c0..92cd16333 100755 --- a/lib/wrappers/tre.nim +++ b/lib/wrappers/tre.nim @@ -10,7 +10,7 @@ when not defined(treDll): when hostOS == "windows": const treDll = "tre.dll" elif hostOS == "macosx": - const treDll = "libtre.dynlib" + const treDll = "libtre.dylib" else: const treDll = "libtre.so(.5|)" |