# Interface for QuickJS libregexp. import std/unicode import bindings/libregexp import bindings/quickjs import types/opt import utils/twtstr export LRE_FLAG_GLOBAL, LRE_FLAG_IGNORECASE, LRE_FLAG_MULTILINE, LRE_FLAG_DOTALL, LRE_FLAG_UTF16, LRE_FLAG_STICKY type Regex* = object bytecode: seq[uint8] buf: string RegexResult* = object success*: bool captures*: seq[tuple[s, e: int]] # start, end RegexReplace* = object regex: Regex rule: string global: bool var dummyRuntime = JS_NewRuntime() var dummyContext = JS_NewContextRaw(dummyRuntime) func `$`*(regex: Regex): string = regex.buf proc compileRegex*(buf: string, flags: int): Result[Regex, string] = var error_msg_size = 64 var error_msg = newString(error_msg_size) prepareMutation(error_msg) var plen: cint let bytecode = lre_compile(addr plen, cstring(error_msg), cint(error_msg_size), cstring(buf), csize_t(buf.len), cint(flags), dummyContext) if bytecode == nil: return err(error_msg.until('\0')) # Failed to compile. assert plen > 0 var bcseq = newSeqUninitialized[uint8](plen) copyMem(addr bcseq[0], bytecode, plen) dummyRuntime.js_free_rt(bytecode) let regex = Regex( buf: buf, bytecode: bcseq ) return ok(regex) func countBackslashes(buf: string, i: int): int = var j = 0 for i in countdown(i, 0): if buf[i] != '\\': break inc j return j # ^abcd -> ^abcd # efgh$ -> efgh$ # ^ijkl$ -> ^ijkl$ # mnop -> ^mnop$ proc compileMatchRegex*(buf: string): Result[Regex, string] = if buf.len == 0: return compileRegex(buf, 0) if buf[0] == '^': return compileRegex(buf, 0) if buf[^1] == '$': # Check whether the final dollar sign is escaped. if buf.len == 1 or buf[^2] != '\\': return compileRegex(buf, 0) let j = buf.countBackslashes(buf.high - 2) if j mod 2 == 1: # odd, because we do not count the last backslash return compileRegex(buf, 0) # escaped. proceed as if no dollar sign was at the end if buf[^1] == '\\': # Check if the regex contains an invalid trailing backslash. let j = buf.countBackslashes(buf.high - 1) if j mod 2 != 1: # odd, because we do not count the last backslash return err("unexpected end") var buf2 = "^" buf2 &= buf buf2 &= "$" return compileRegex(buf2, 0) proc compileSearchRegex*(str: string): Result[Regex, string] = # Parse any applicable flags in regex/. The last forward slash is # dropped when is empty, and interpreted as a character when the # flags are is invalid. var i = str.high var flagsi = -1 while i >= 0: case str[i] of '/': flagsi = i break of 'i', 'm', 's', 'u': discard else: break # invalid flag dec i var flags = LRE_FLAG_GLOBAL # for easy backwards matching if flagsi == -1: return compileRegex(str, flags) for i in flagsi..str.high: case str[i] of '/': discard of 'i': flags = flags or LRE_FLAG_IGNORECASE of 'm': flags = flags or LRE_FLAG_MULTILINE of 's': flags = flags or LRE_FLAG_DOTALL of 'u': flags = flags or LRE_FLAG_UTF16 else: assert false return compileRegex(str.substr(0, flagsi - 1), flags) proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): RegexResult = let length = if length == -1: str.len else: length assert 0 <= start and start <= length let bytecode = unsafeAddr regex.bytecode[0] let captureCount = lre_get_capture_count(bytecode) var capture: ptr UncheckedArray[int] = nil if captureCount > 0: let size = sizeof(ptr uint8) * captureCount * 2 capture = cast[ptr UncheckedArray[int]](alloc0(size)) var cstr = cstring(str) let flags = lre_get_flags(bytecode) var start = start while true: let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode, cast[ptr uint8](cstr), cint(start), cint(length), cint(3), dummyContext) if ret != 1: #TODO error handling? (-1) break result.success = true if captureCount == 0 or nocaps: break let cstrAddress = cast[int](cstr) let ps = start start = capture[1] - cstrAddress for i in 0 ..< captureCount: let s = capture[i * 2] - cstrAddress let e = capture[i * 2 + 1] - cstrAddress result.captures.add((s, e)) if (flags and LRE_FLAG_GLOBAL) != 1: break if start >= str.len: break if ps == start: start += runeLenAt(str, start) if captureCount > 0: dealloc(capture) proc match*(regex: Regex, str: string, start = 0, length = str.len): bool = return regex.exec(str, start, length, nocaps = true).success