# Interface for QuickJS libregexp. import std/unicode import bindings/libregexp import types/opt import utils/twtstr export LREFlags type Regex* = object bytecode: seq[uint8] buf: string RegexCapture* = tuple # start, end, index s, e: int i: int32 RegexResult* = object success*: bool captures*: seq[RegexCapture] RegexReplace* = object regex: Regex rule: string global: bool func `$`*(regex: Regex): string = regex.buf # this is hardcoded into quickjs, so we must override it here. proc lre_realloc(opaque, p: pointer; size: csize_t): pointer {.exportc.} = return realloc(p, size) proc compileRegex*(buf: string; flags: LREFlags = {}): Result[Regex, string] = var errorMsg = newString(64) var plen: cint let bytecode = lre_compile(addr plen, cstring(errorMsg), cint(errorMsg.len), cstring(buf), csize_t(buf.len), flags.toCInt, nil) if bytecode == nil: return err(errorMsg.until('\0')) # Failed to compile. assert plen > 0 var bcseq = newSeqUninitialized[uint8](plen) copyMem(addr bcseq[0], bytecode, plen) dealloc(bytecode) let regex = Regex( buf: buf, bytecode: bcseq ) return ok(regex) func countBackslashes(buf: string, i: int): int = var j = 0 for i in countdown(i, 0): if buf[i] != '\\': break inc j return j # ^abcd -> ^abcd # efgh$ -> efgh$ # ^ijkl$ -> ^ijkl$ # mnop -> ^mnop$ proc compileMatchRegex*(buf: string): Result[Regex, string] = if buf.len == 0: return compileRegex(buf) if buf[0] == '^': return compileRegex(buf) if buf[^1] == '$': # Check whether the final dollar sign is escaped. if buf.len == 1 or buf[^2] != '\\': return compileRegex(buf) let j = buf.countBackslashes(buf.high - 2) if j mod 2 == 1: # odd, because we do not count the last backslash return compileRegex(buf) # escaped. proceed as if no dollar sign was at the end if buf[^1] == '\\': # Check if the regex contains an invalid trailing backslash. let j = buf.countBackslashes(buf.high - 1) if j mod 2 != 1: # odd, because we do not count the last backslash return err("unexpected end") var buf2 = "^" buf2 &= buf buf2 &= "$" return compileRegex(buf2) proc compileSearchRegex*(str: string; defaultFlags: LREFlags): Result[Regex, string] = # Emulate vim's \c/\C: override defaultFlags if one is found, then remove it # from str. # Also, replace \< and \> with \b as (a bit sloppy) vi emulation. var flags = defaultFlags var s = newStringOfCap(str.len) var quot = false for c in str: if quot: quot = false case c of 'c': flags.incl(LRE_FLAG_IGNORECASE) of 'C': flags.excl(LRE_FLAG_IGNORECASE) of '<', '>': s &= "\\b" else: s &= '\\' & c elif c == '\\': quot = true else: s &= c if quot: s &= '\\' flags.incl(LRE_FLAG_GLOBAL) # for easy backwards matching return compileRegex(s, flags) proc exec*(regex: Regex; str: string; start = 0; length = -1; nocaps = false): RegexResult = let length = if length == -1: str.len else: length assert start in 0 .. length let bytecode = unsafeAddr regex.bytecode[0] let captureCount = lre_get_capture_count(bytecode) var capture: ptr UncheckedArray[int] = nil if captureCount > 0: let size = sizeof(ptr uint8) * captureCount * 2 capture = cast[ptr UncheckedArray[int]](alloc0(size)) var cstr = cstring(str) let flags = lre_get_flags(bytecode).toLREFlags var start = start while true: let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode, cast[ptr uint8](cstr), cint(start), cint(length), cint(3), nil) if ret != 1: #TODO error handling? (-1) break result.success = true if captureCount == 0 or nocaps: break let cstrAddress = cast[int](cstr) let ps = start start = capture[1] - cstrAddress for i in 0 ..< captureCount: let s = capture[i * 2] - cstrAddress let e = capture[i * 2 + 1] - cstrAddress result.captures.add((s, e, i)) if LRE_FLAG_GLOBAL notin flags: break if start >= str.len: break if ps == start: start += runeLenAt(str, start) if captureCount > 0: dealloc(capture) proc match*(regex: Regex; str: string; start = 0; length = str.len): bool = return regex.exec(str, start, length, nocaps = true).success