summary refs log tree commit diff stats
path: root/lib/base/devel/nregex.nim
diff options
context:
space:
mode:
authorAndreas Rumpf <rumpf_a@web.de>2008-11-16 22:11:53 +0100
committerAndreas Rumpf <rumpf_a@web.de>2008-11-16 22:11:53 +0100
commit985113a88fe07fa18f0a34ceaa9e8c1c1e04a79b (patch)
tree6a2e56078dd24d29d9031a784917ed94696d24a8 /lib/base/devel/nregex.nim
parent8b2a9401a147bd0b26cd2976ae71a1022fbde8cc (diff)
downloadNim-985113a88fe07fa18f0a34ceaa9e8c1c1e04a79b.tar.gz
version 0.7.0
Diffstat (limited to 'lib/base/devel/nregex.nim')
-rw-r--r--lib/base/devel/nregex.nim124
1 files changed, 124 insertions, 0 deletions
diff --git a/lib/base/devel/nregex.nim b/lib/base/devel/nregex.nim
new file mode 100644
index 000000000..77afb8421
--- /dev/null
+++ b/lib/base/devel/nregex.nim
@@ -0,0 +1,124 @@
+# new implementation of regular expressions
+
+type
+  TRegexKind = enum 
+    regNone,
+    regChar, 
+    regSet,
+    regConc,
+    regAlt,
+    regStar,
+    regPlus,
+    regMN,
+    regNewline
+  
+  TRegex = object of TObject
+    case kind: TRegexKind
+    of regChar: c: char
+    of regSet: s: ref set[char]
+    else: a, b: PRegEx
+    
+  PRegEx* = ref TRegEx
+
+  TRegExFlag* = enum   ## Flags concerning the semantics of regular expressions
+    reCaseInsensitive, ## case insensitive match 
+    reStyleInsensitive ## style insensitive match
+    
+    
+  TRegExFlags* = set[TRegExFlag]
+    ## Flags concerning the semantics of regular expressions
+    
+proc raiseRegex(msg: string) {.noreturn.} = 
+  var e: ref Exception
+  new(e)
+  e.msg = msg
+  raise e
+
+proc compileAux(i: int, s: string, r: PRegEx): int
+    
+proc compileBackslash(i: int, s: string, r: PRegEx): int = 
+  var i = i
+  inc(i)
+  case s[i]
+  of 'A'..'Z': 
+  of 'a'..'z':
+  of '0':
+  of '1'..'9': 
+  
+  else:
+    r.kind = regChar
+    r.c = s[i]
+  inc(i)
+  result = i
+
+proc compileAtom(i: int, s: string, r: PRegEx): int = 
+  var i = i
+  case s[i]
+  of '[':
+    inc(i)
+    var inverse = s[i] == '^'
+    if inverse: inc(i)
+    r.kind = regSet
+    new(r.s)
+    while true: 
+      case s[i]
+      of '\\': i = compileBackslash(i, s, r)
+      of ']': 
+        inc(i)
+        break
+      of '\0': 
+        raiseRegex("']' expected")
+      elif s[i+1] == '-':
+        var x = s[i]
+        inc(i, 2)
+        var y = s[i]
+        inc(i)
+        r.s = r.s + {x..y}
+      else:
+        incl(r.s, s[i])
+        inc(i)
+    if inverse:
+      r.s = {'\0'..'\255'} - r.s
+  of '\\':
+    inc(i)
+    i = compileBackslash(i, s, r)
+  of '.':
+    r.kind = regAny
+    inc(i)
+  of '(': 
+    inc(i)
+    i = compileAux(i, s, r)
+    if s[i] = ')': inc(i)
+    else: raiseRegex("')' expected")
+  of '\0': nil # do nothing
+  else:
+    r.kind = regChar
+    r.c = s[i]
+    inc(i)
+  result = i
+    
+proc compilePostfix(i: int, s: string, r: PRegEx): int = 
+  var i = compileAtom(i, s, r)
+  var a: PRegEx
+  case s[i]
+  of '*':
+  of '+':
+  of '?':
+  else: nil
+
+proc compileAux(i: int, s: string, r: PRegEx): int = 
+  var i = i
+  i = compileAtom(i, s, r)
+  
+  while s[i] != '\0':
+    
+  result = i
+    
+proc compile*(regex: string, flags: TRegExFlags = {}): PRegEx = 
+  ## Compiles the string `regex` that represents a regular expression into 
+  ## an internal data structure that can be used for matching.
+  new(result)
+  var i = compileAux(0, regex, result)
+  if i < len(regex)-1:
+    # not all characters used for the regular expression?
+    raiseRegEx("invalid regular expression")