diff options
author | Flaviu Tamas <tamasflaviu@gmail.com> | 2015-01-10 16:20:25 -0500 |
---|---|---|
committer | Flaviu Tamas <tamasflaviu@gmail.com> | 2015-01-10 16:20:25 -0500 |
commit | 721ea11628a3717b07c1867096e817f06068a24e (patch) | |
tree | d174caf84d0a209149404c7fe6c590bb6e108f6d /src/nre.nim | |
parent | 2db1a54710db4684f5f816ea4541f0998c11de9e (diff) | |
download | Nim-721ea11628a3717b07c1867096e817f06068a24e.tar.gz |
Implement captures
Diffstat (limited to 'src/nre.nim')
-rw-r--r-- | src/nre.nim | 142 |
1 files changed, 104 insertions, 38 deletions
diff --git a/src/nre.nim b/src/nre.nim index 5839cab2a..cb15fb260 100644 --- a/src/nre.nim +++ b/src/nre.nim @@ -70,11 +70,18 @@ type pcreObj: ptr pcre.Pcre # not nil pcreExtra: ptr pcre.ExtraData ## nil - RegexMatch* = object + captureNameToId: Table[string, int] + + RegexMatch* = ref object pattern: Regex - matchBounds: seq[Slice[cint]] ## First item is the bounds of the match + inputStr: string + pcreMatchBounds: seq[Slice[cint]] ## First item is the bounds of the match ## Other items are the captures ## `a` is inclusive start, `b` is exclusive end + matchCache: seq[string] + + Captures* = distinct RegexMatch + CaptureBounds* = distinct RegexMatch SyntaxError* = ref object of Exception pos*: int ## the location of the syntax error in bytes @@ -82,6 +89,72 @@ type StudyError* = ref object of Exception +proc getinfo[T](self: Regex, opt: cint): T = + let retcode = pcre.fullinfo(self.pcreObj, self.pcreExtra, opt, addr result) + + if retcode < 0: + # XXX Error message that doesn't expose implementation details + raise newException(FieldError, "Invalid getinfo for $1, errno $2" % [$opt, $retcode]) + +proc captureCount*(self: Regex): int = + ## get the maximum number of captures + ## + ## Does not return the number of captured captures + return getinfo[int](self, pcre.INFO_CAPTURECOUNT) + +# Capture accessors {{{ +proc captureBounds*(self: RegexMatch): CaptureBounds = return CaptureBounds(self) + +proc captures*(self: RegexMatch): Captures = return Captures(self) + +proc `[]`*(self: CaptureBounds, i: int): Option[Slice[int]] = + ## Gets the bounds of the `i`th capture. + ## Undefined behavior if `i` is out of bounds + ## If `i` is a failed optional capture, returns None + ## If `i == -1`, returns the whole match + let self = RegexMatch(self) + if self.pcreMatchBounds[i + 1].a != -1: + let bounds = self.pcreMatchBounds[i + 1] + return Some(int(bounds.a) .. int(bounds.b)) + else: + return None[Slice[int]]() + +proc `[]`*(self: Captures, i: int): string = + ## gets the `i`th capture + ## Undefined behavior if `i` is out of bounds + ## If `i` is a failed optional capture, returns nil + ## If `i == -1`, returns the whole match + let self = RegexMatch(self) + let bounds = self.captureBounds[i] + + if bounds: + let bounds = bounds.get + if self.matchCache == nil: + # capture count, plus the entire string + self.matchCache = newSeq[string](self.pattern.captureCount + 1) + if self.matchCache[i + 1] == nil: + self.matchCache[i + 1] = self.inputStr[bounds.a .. bounds.b-1] + return self.matchCache[i + 1] + else: + return nil + +proc match*(self: RegexMatch): string = + return self.captures[-1] + +proc matchBounds*(self: RegexMatch): Slice[int] = + return self.captureBounds[-1].get + +proc `[]`*(self: CaptureBounds, name: string): Option[Slice[int]] = + ## Will fail with KeyError if `name` is not a real named capture + let self = RegexMatch(self) + return self.captureBounds[self.pattern.captureNameToId.fget(name)] + +proc `[]`*(self: Captures, name: string): string = + ## Will fail with KeyError if `name` is not a real named capture + let self = RegexMatch(self) + return self.captures[self.pattern.captureNameToId.fget(name)] +# }}} + # Creation & Destruction {{{ proc destroyRegex(self: Regex) = pcre.free_substring(cast[cstring](self.pcreObj)) @@ -89,6 +162,27 @@ proc destroyRegex(self: Regex) = if self.pcreExtra != nil: pcre.free_study(self.pcreExtra) +type UncheckedArray {.unchecked.}[T] = array[0 .. 0, T] +proc getNameToNumberTable(self: Regex): Table[string, int] = + let entryCount = getinfo[cint](self, pcre.INFO_NAMECOUNT) + let entrySize = getinfo[cint](self, pcre.INFO_NAMEENTRYSIZE) + let table = cast[ptr UncheckedArray[uint8]]( + getinfo[int](self, pcre.INFO_NAMETABLE)) + + result = initTable[string, int]() + + for i in 0 .. <entryCount: + let pos = i * entrySize + let num = (int(table[pos]) shl 8) or int(table[pos + 1]) - 1 + var name = "" + + var idx = 2 + while table[pos + idx] != 0: + name.add(char(table[pos + idx])) + idx += 1 + + result[name] = num + proc initRegex*(pattern: string, options = "Sx"): Regex = new(result, destroyRegex) result.pattern = pattern @@ -111,50 +205,22 @@ proc initRegex*(pattern: string, options = "Sx"): Regex = result.pcreExtra = pcre.study(result.pcreObj, 0x0, addr errorMsg) if result.pcreExtra == nil: raise StudyError(msg: $errorMsg) -# }}} - -proc getinfo[T](self: Regex, opt: cint): T = - let retcode = pcre.fullinfo(self.pcreObj, self.pcreExtra, opt, addr result) - if retcode < 0: - # XXX Error message that doesn't expose implementation details - raise newException(FieldError, "Invalid getinfo for $1, errno $2" % [$opt, $retcode]) - -proc getCaptureCount(self: Regex): int = - # get the maximum number of captures - return getinfo[int](self, pcre.INFO_CAPTURECOUNT) - -type UncheckedArray {.unchecked.}[T] = array[0 .. 0, T] -proc getNameToNumberTable(self: Regex): Table[string, int] = - let entryCount = getinfo[cint](self, pcre.INFO_NAMECOUNT) - let entrySize = getinfo[cint](self, pcre.INFO_NAMEENTRYSIZE) - let table = cast[ptr UncheckedArray[uint8]]( - getinfo[int](self, pcre.INFO_NAMETABLE)) - - result = initTable[string, int]() - - for i in 0 .. <entryCount: - let pos = i * entrySize - let num = (int(table[pos]) shl 8) or int(table[pos + 1]) - var name = "" - - var idx = 2 - while table[pos + idx] != 0: - name.add(char(table[pos + idx])) - idx += 1 - - result[name] = num + result.captureNameToId = result.getNameToNumberTable() +# }}} proc exec*(self: Regex, str: string, start = 0): Option[RegexMatch] = var result: RegexMatch + new(result) result.pattern = self + result.inputStr = str # See PCRE man pages. # 2x capture count to make room for start-end pairs # 1x capture count as slack space for PCRE - let vecsize = (self.getCaptureCount() + 1) * 3 + let vecsize = (self.captureCount() + 1) * 3 # div 2 because each element is 2 cints long - result.matchBounds = newSeq[Slice[cint]](ceil(vecsize / 2).int) - result.matchBounds.setLen(vecsize div 3) + result.pcreMatchBounds = newSeq[Slice[cint]](ceil(vecsize / 2).int) + result.pcreMatchBounds.setLen(vecsize div 3) let execRet = pcre.exec(self.pcreObj, self.pcreExtra, @@ -162,7 +228,7 @@ proc exec*(self: Regex, str: string, start = 0): Option[RegexMatch] = cint(str.len), cint(start), cint(0), - cast[ptr cint](addr result.matchBounds[0]), cint(vecsize)) + cast[ptr cint](addr result.pcreMatchBounds[0]), cint(vecsize)) if execRet >= 0: return Some(result) elif execRet == pcre.ERROR_NOMATCH: |