diff options
author | Florent <florent@napalu.ch> | 2017-03-02 11:48:41 +0100 |
---|---|---|
committer | Andreas Rumpf <rumpf_a@web.de> | 2017-03-02 11:48:41 +0100 |
commit | 34a3d40d18ef4ff73c629e38738068fe509e3c6c (patch) | |
tree | 752560732786fe844d1d08f74c391c610491b0ae | |
parent | 32159ee827694985a5c1013a46761ad257ea7c75 (diff) | |
download | Nim-34a3d40d18ef4ff73c629e38738068fe509e3c6c.tar.gz |
fixes #5444 - nre.findIter keeps searching when no match is possible (#5453)
-rw-r--r-- | lib/impure/nre.nim | 12 | ||||
-rw-r--r-- | tests/stdlib/nre/find.nim | 19 |
2 files changed, 25 insertions, 6 deletions
diff --git a/lib/impure/nre.nim b/lib/impure/nre.nim index dda4b033f..4013182af 100644 --- a/lib/impure/nre.nim +++ b/lib/impure/nre.nim @@ -516,23 +516,23 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R let unicode = uint32(getinfo[culong](pattern, pcre.INFO_OPTIONS) and pcre.UTF8) > 0u32 let strlen = if endpos == int.high: str.len else: endpos+1 - var offset = start var match: Option[RegexMatch] + var neverMatched = true + while true: var flags = 0 - if match.isSome and match.get.matchBounds.a > match.get.matchBounds.b: # 0-len match flags = pcre.NOTEMPTY_ATSTART - match = str.matchImpl(pattern, offset, endpos, flags) if match.isNone: # either the end of the input or the string - # cannot be split here - if offset >= strlen: + # cannot be split here - we also need to bail + # if we've never matched and we've already tried to... + if offset >= strlen or neverMatched: break if matchesCrLf and offset < (str.len - 1) and @@ -546,11 +546,11 @@ iterator findIter*(str: string, pattern: Regex, start = 0, endpos = int.high): R else: offset += 1 else: + neverMatched = false offset = match.get.matchBounds.b + 1 yield match.get - proc find*(str: string, pattern: Regex, start = 0, endpos = int.high): Option[RegexMatch] = ## Finds the given pattern in the string between the end and start ## positions. diff --git a/tests/stdlib/nre/find.nim b/tests/stdlib/nre/find.nim index 94fdd0bc1..116d2111c 100644 --- a/tests/stdlib/nre/find.nim +++ b/tests/stdlib/nre/find.nim @@ -1,6 +1,7 @@ import unittest, sequtils import nre except toSeq import optional_nonstrict +import times, strutils suite "find": test "find text": @@ -25,3 +26,21 @@ suite "find": check("word word".findAll(re"\b") == @["", "", "", ""]) check("word\r\lword".findAll(re"(*ANYCRLF)(?m)$") == @["", ""]) check("слово слово".findAll(re"(*U)\b") == @["", "", "", ""]) + + test "bail early": + ## we expect nothing to be found and we should be bailing out early which means that + ## the timing difference between searching in small and large data should be well + ## within a tolerance area + const tolerance = 0.0001 + var smallData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\"", 10) + var largeData = repeat("url.sequence = \"http://whatever.com/jwhrejrhrjrhrjhrrjhrjrhrjrh\"", 1000000) + var start = cpuTime() + check(largeData.findAll(re"url.*? = '(.*?)'") == newSeq[string]()) + var stop = cpuTime() + var elapsedLarge = stop - start + start = cpuTime() + check(smallData.findAll(re"url.*? = '(.*?)'") == newSeq[string]()) + stop = cpuTime() + var elapsedSmall = stop - start + var difference = elapsedLarge - elapsedSmall + check(difference < tolerance) |