summary refs log tree commit diff stats
path: root/src/nre.nim
diff options
context:
space:
mode:
authorFlaviu Tamas <tamasflaviu@gmail.com>2015-01-10 16:20:25 -0500
committerFlaviu Tamas <tamasflaviu@gmail.com>2015-01-10 16:20:25 -0500
commit721ea11628a3717b07c1867096e817f06068a24e (patch)
treed174caf84d0a209149404c7fe6c590bb6e108f6d /src/nre.nim
parent2db1a54710db4684f5f816ea4541f0998c11de9e (diff)
downloadNim-721ea11628a3717b07c1867096e817f06068a24e.tar.gz
Implement captures
Diffstat (limited to 'src/nre.nim')
-rw-r--r--src/nre.nim142
1 files changed, 104 insertions, 38 deletions
diff --git a/src/nre.nim b/src/nre.nim
index 5839cab2a..cb15fb260 100644
--- a/src/nre.nim
+++ b/src/nre.nim
@@ -70,11 +70,18 @@ type
     pcreObj: ptr pcre.Pcre  # not nil
     pcreExtra: ptr pcre.ExtraData  ## nil
 
-  RegexMatch* = object
+    captureNameToId: Table[string, int]
+
+  RegexMatch* = ref object
     pattern: Regex
-    matchBounds: seq[Slice[cint]] ## First item is the bounds of the match
+    inputStr: string
+    pcreMatchBounds: seq[Slice[cint]] ## First item is the bounds of the match
                                   ## Other items are the captures
                                   ## `a` is inclusive start, `b` is exclusive end
+    matchCache: seq[string]
+
+  Captures* = distinct RegexMatch
+  CaptureBounds* = distinct RegexMatch
 
   SyntaxError* = ref object of Exception
     pos*: int  ## the location of the syntax error in bytes
@@ -82,6 +89,72 @@ type
 
   StudyError* = ref object of Exception
 
+proc getinfo[T](self: Regex, opt: cint): T =
+  let retcode = pcre.fullinfo(self.pcreObj, self.pcreExtra, opt, addr result)
+
+  if retcode < 0:
+    # XXX Error message that doesn't expose implementation details
+    raise newException(FieldError, "Invalid getinfo for $1, errno $2" % [$opt, $retcode])
+
+proc captureCount*(self: Regex): int =
+  ## get the maximum number of captures
+  ##
+  ## Does not return the number of captured captures
+  return getinfo[int](self, pcre.INFO_CAPTURECOUNT)
+
+# Capture accessors {{{
+proc captureBounds*(self: RegexMatch): CaptureBounds = return CaptureBounds(self)
+
+proc captures*(self: RegexMatch): Captures = return Captures(self)
+
+proc `[]`*(self: CaptureBounds, i: int): Option[Slice[int]] =
+  ## Gets the bounds of the `i`th capture.
+  ## Undefined behavior if `i` is out of bounds
+  ## If `i` is a failed optional capture, returns None
+  ## If `i == -1`, returns the whole match
+  let self = RegexMatch(self)
+  if self.pcreMatchBounds[i + 1].a != -1:
+    let bounds = self.pcreMatchBounds[i + 1]
+    return Some(int(bounds.a) .. int(bounds.b))
+  else:
+    return None[Slice[int]]()
+
+proc `[]`*(self: Captures, i: int): string =
+  ## gets the `i`th capture
+  ## Undefined behavior if `i` is out of bounds
+  ## If `i` is a failed optional capture, returns nil
+  ## If `i == -1`, returns the whole match
+  let self = RegexMatch(self)
+  let bounds = self.captureBounds[i]
+
+  if bounds:
+    let bounds = bounds.get
+    if self.matchCache == nil:
+      # capture count, plus the entire string
+      self.matchCache = newSeq[string](self.pattern.captureCount + 1)
+    if self.matchCache[i + 1] == nil:
+      self.matchCache[i + 1] = self.inputStr[bounds.a .. bounds.b-1]
+    return self.matchCache[i + 1]
+  else:
+    return nil
+
+proc match*(self: RegexMatch): string =
+  return self.captures[-1]
+
+proc matchBounds*(self: RegexMatch): Slice[int] =
+  return self.captureBounds[-1].get
+
+proc `[]`*(self: CaptureBounds, name: string): Option[Slice[int]] =
+  ## Will fail with KeyError if `name` is not a real named capture
+  let self = RegexMatch(self)
+  return self.captureBounds[self.pattern.captureNameToId.fget(name)]
+
+proc `[]`*(self: Captures, name: string): string =
+  ## Will fail with KeyError if `name` is not a real named capture
+  let self = RegexMatch(self)
+  return self.captures[self.pattern.captureNameToId.fget(name)]
+# }}}
+
 # Creation & Destruction {{{
 proc destroyRegex(self: Regex) =
   pcre.free_substring(cast[cstring](self.pcreObj))
@@ -89,6 +162,27 @@ proc destroyRegex(self: Regex) =
   if self.pcreExtra != nil:
     pcre.free_study(self.pcreExtra)
 
+type UncheckedArray {.unchecked.}[T] = array[0 .. 0, T]
+proc getNameToNumberTable(self: Regex): Table[string, int] =
+  let entryCount = getinfo[cint](self, pcre.INFO_NAMECOUNT)
+  let entrySize = getinfo[cint](self, pcre.INFO_NAMEENTRYSIZE)
+  let table = cast[ptr UncheckedArray[uint8]](
+                getinfo[int](self, pcre.INFO_NAMETABLE))
+
+  result = initTable[string, int]()
+
+  for i in 0 .. <entryCount:
+    let pos = i * entrySize
+    let num = (int(table[pos]) shl 8) or int(table[pos + 1]) - 1
+    var name = ""
+
+    var idx = 2
+    while table[pos + idx] != 0:
+      name.add(char(table[pos + idx]))
+      idx += 1
+
+    result[name] = num
+
 proc initRegex*(pattern: string, options = "Sx"): Regex =
   new(result, destroyRegex)
   result.pattern = pattern
@@ -111,50 +205,22 @@ proc initRegex*(pattern: string, options = "Sx"): Regex =
     result.pcreExtra = pcre.study(result.pcreObj, 0x0, addr errorMsg)
     if result.pcreExtra == nil:
       raise StudyError(msg: $errorMsg)
-# }}}
-
-proc getinfo[T](self: Regex, opt: cint): T =
-  let retcode = pcre.fullinfo(self.pcreObj, self.pcreExtra, opt, addr result)
 
-  if retcode < 0:
-    # XXX Error message that doesn't expose implementation details
-    raise newException(FieldError, "Invalid getinfo for $1, errno $2" % [$opt, $retcode])
-
-proc getCaptureCount(self: Regex): int =
-  # get the maximum number of captures
-  return getinfo[int](self, pcre.INFO_CAPTURECOUNT)
-
-type UncheckedArray {.unchecked.}[T] = array[0 .. 0, T]
-proc getNameToNumberTable(self: Regex): Table[string, int] =
-  let entryCount = getinfo[cint](self, pcre.INFO_NAMECOUNT)
-  let entrySize = getinfo[cint](self, pcre.INFO_NAMEENTRYSIZE)
-  let table = cast[ptr UncheckedArray[uint8]](
-                getinfo[int](self, pcre.INFO_NAMETABLE))
-
-  result = initTable[string, int]()
-
-  for i in 0 .. <entryCount:
-    let pos = i * entrySize
-    let num = (int(table[pos]) shl 8) or int(table[pos + 1])
-    var name = ""
-
-    var idx = 2
-    while table[pos + idx] != 0:
-      name.add(char(table[pos + idx]))
-      idx += 1
-
-    result[name] = num
+  result.captureNameToId = result.getNameToNumberTable()
+# }}}
 
 proc exec*(self: Regex, str: string, start = 0): Option[RegexMatch] =
   var result: RegexMatch
+  new(result)
   result.pattern = self
+  result.inputStr = str
   # See PCRE man pages.
   # 2x capture count to make room for start-end pairs
   # 1x capture count as slack space for PCRE
-  let vecsize = (self.getCaptureCount() + 1) * 3
+  let vecsize = (self.captureCount() + 1) * 3
   # div 2 because each element is 2 cints long
-  result.matchBounds = newSeq[Slice[cint]](ceil(vecsize / 2).int)
-  result.matchBounds.setLen(vecsize div 3)
+  result.pcreMatchBounds = newSeq[Slice[cint]](ceil(vecsize / 2).int)
+  result.pcreMatchBounds.setLen(vecsize div 3)
 
   let execRet = pcre.exec(self.pcreObj,
                           self.pcreExtra,
@@ -162,7 +228,7 @@ proc exec*(self: Regex, str: string, start = 0): Option[RegexMatch] =
                           cint(str.len),
                           cint(start),
                           cint(0),
-                          cast[ptr cint](addr result.matchBounds[0]), cint(vecsize))
+                          cast[ptr cint](addr result.pcreMatchBounds[0]), cint(vecsize))
   if execRet >= 0:
     return Some(result)
   elif execRet == pcre.ERROR_NOMATCH: