diff options
author | Flaviu Tamas <tamasflaviu@gmail.com> | 2015-01-09 19:40:41 -0500 |
---|---|---|
committer | Flaviu Tamas <tamasflaviu@gmail.com> | 2015-01-09 19:40:41 -0500 |
commit | dec2f9efa9dd88d4bce0d42fa25dd758b90fa1a1 (patch) | |
tree | cf74eeebb2b9a0c608fb6eea380b5c399ba32877 /src | |
parent | b36ba634a687c762fa8062ec0c5296b6f19986c1 (diff) | |
download | Nim-dec2f9efa9dd88d4bce0d42fa25dd758b90fa1a1.tar.gz |
Implement regex initialization
Diffstat (limited to 'src')
-rw-r--r-- | src/nre.nim | 97 | ||||
-rw-r--r-- | src/private/util.nim | 7 |
2 files changed, 104 insertions, 0 deletions
diff --git a/src/nre.nim b/src/nre.nim new file mode 100644 index 000000000..79d689bcd --- /dev/null +++ b/src/nre.nim @@ -0,0 +1,97 @@ +import private.pcre as pcre +import private.util +import tables +import unsigned +from strutils import toLower + +# PCRE Options {{{ + +let Options: Table[string, int] = { + "8" : pcre.UTF8, + "9" : pcre.NEVER_UTF, + "?" : pcre.NO_UTF8_CHECK, + "A" : pcre.ANCHORED, + # "C" : pcre.AUTO_CALLOUT, unsuported XXX + "E" : pcre.DOLLAR_ENDONLY, + "f" : pcre.FIRSTLINE, + "i" : pcre.CASELESS, + "J" : pcre.DUPNAMES, + "m" : pcre.MULTILINE, + "N" : pcre.NO_AUTO_CAPTURE, + "O" : pcre.NO_AUTO_POSSESS, + "s" : pcre.DOTALL, + "U" : pcre.UNGREEDY, + "W" : pcre.UCP, + "X" : pcre.EXTRA, + "x" : pcre.EXTENDED, + "Y" : pcre.NO_START_OPTIMIZE, + + "any" : pcre.NEWLINE_ANY, + "anycrlf" : pcre.NEWLINE_ANYCRLF, + "cr" : pcre.NEWLINE_CR, + "crlf" : pcre.NEWLINE_CRLF, + "lf" : pcre.NEWLINE_LF, + "bsr_anycrlf" : pcre.BSR_ANYCRLF, + "bsr_unicode" : pcre.BSR_UNICODE, + "js" : pcre.JAVASCRIPT_COMPAT, +}.toTable + +proc tokenizeOptions(opts: string): tuple[flags: int, study: bool] = + result = (0, false) + + var longOpt: string = nil + for i, c in opts: + # Handle long options {{{ + if c == '<': + longOpt = "" + continue + + if longOpt != nil: + if c == '>': + result.flags = result.flags or Options.fget(longOpt) + longOpt = nil + else: + longOpt.add(c.toLower) + continue + # }}} + + if c == 'S': # handle study + result.study = true + continue + + result.flags = result.flags or Options.fget($c) + +# }}} + +type + Regex* = ref object + pcreObj: ptr pcre.Pcre + pcreExtra: ptr pcre.ExtraData + + SyntaxError* = ref object of Exception + pos*: int ## the location of the syntax error in bytes + pattern*: string ## the pattern that caused the problem + + StudyError* = ref object of Exception + +proc initRegex*(pattern: string, options = "Sx"): Regex = + new result + + var errorMsg: cstring + var errOffset: cint + + let opts = tokenizeOptions(options) + + result.pcreObj = pcre.compile(cstring(pattern), + # better hope int is at least 4 bytes.. + cint(opts.flags), addr errorMsg, + addr errOffset, nil) + if result.pcreObj == nil: + # failed to compile + raise SyntaxError(msg: $errorMsg, pos: errOffset, pattern: pattern) + + if opts.study: + # XXX investigate JIT + result.pcreExtra = pcre.study(result.pcreObj, 0x0, addr errorMsg) + if result.pcreExtra == nil: + raise StudyError(msg: $errorMsg) diff --git a/src/private/util.nim b/src/private/util.nim new file mode 100644 index 000000000..afd90a485 --- /dev/null +++ b/src/private/util.nim @@ -0,0 +1,7 @@ +import tables + +proc fget*[K, V](self: Table[K, V], key: K): V = + if self.hasKey(key): + return self[key] + else: + raise newException(KeyError, "Key does not exist in table: " & $key) |