diff options
author | bptato <nincsnevem662@gmail.com> | 2024-06-03 20:42:16 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-06-03 21:15:44 +0200 |
commit | 3aa8f1e0694d1606c3f3795f8b83e8a82caacd3e (patch) | |
tree | 9708d4599360116a96e4aa7f983eea387e8437c6 /src/js/jsregex.nim | |
parent | 3e12a95ab34e120fb958ba0eeebaada5def7cd11 (diff) | |
download | chawan-3aa8f1e0694d1606c3f3795f8b83e8a82caacd3e.tar.gz |
Move JS wrapper into Monoucha
Operation "modularize Chawan somewhat" part 3
Diffstat (limited to 'src/js/jsregex.nim')
-rw-r--r-- | src/js/jsregex.nim | 150 |
1 files changed, 0 insertions, 150 deletions
diff --git a/src/js/jsregex.nim b/src/js/jsregex.nim deleted file mode 100644 index 9fb09872..00000000 --- a/src/js/jsregex.nim +++ /dev/null @@ -1,150 +0,0 @@ -# Interface for QuickJS libregexp. -import std/unicode - -import bindings/libregexp -import types/opt -import utils/twtstr - -export LREFlags - -type - Regex* = object - bytecode: seq[uint8] - when defined(debug): - buf: string - - RegexCapture* = tuple # start, end - s, e: int - - RegexResult* = object - success*: bool - captures*: seq[seq[RegexCapture]] - -when defined(debug): - func `$`*(regex: Regex): string = - regex.buf - -# this is hardcoded into quickjs, so we must override it here. -proc lre_realloc(opaque, p: pointer; size: csize_t): pointer {.exportc.} = - return realloc(p, size) - -proc compileRegex*(buf: string; flags: LREFlags = {}): Result[Regex, string] = - var errorMsg = newString(64) - var plen: cint - let bytecode = lre_compile(addr plen, cstring(errorMsg), cint(errorMsg.len), - cstring(buf), csize_t(buf.len), flags.toCInt, nil) - if bytecode == nil: - return err(errorMsg.until('\0')) # Failed to compile. - assert plen > 0 - var bcseq = newSeqUninitialized[uint8](plen) - copyMem(addr bcseq[0], bytecode, plen) - dealloc(bytecode) - var regex = Regex(bytecode: bcseq) - when defined(debug): - regex.buf = buf - return ok(regex) - -func countBackslashes(buf: string; i: int): int = - var j = 0 - for i in countdown(i, 0): - if buf[i] != '\\': - break - inc j - return j - -# ^abcd -> ^abcd -# efgh$ -> efgh$ -# ^ijkl$ -> ^ijkl$ -# mnop -> ^mnop$ -proc compileMatchRegex*(buf: string): Result[Regex, string] = - if buf.len == 0: - return compileRegex(buf) - if buf[0] == '^': - return compileRegex(buf) - if buf[^1] == '$': - # Check whether the final dollar sign is escaped. - if buf.len == 1 or buf[^2] != '\\': - return compileRegex(buf) - let j = buf.countBackslashes(buf.high - 2) - if j mod 2 == 1: # odd, because we do not count the last backslash - return compileRegex(buf) - # escaped. proceed as if no dollar sign was at the end - if buf[^1] == '\\': - # Check if the regex contains an invalid trailing backslash. - let j = buf.countBackslashes(buf.high - 1) - if j mod 2 != 1: # odd, because we do not count the last backslash - return err("unexpected end") - var buf2 = "^" - buf2 &= buf - buf2 &= "$" - return compileRegex(buf2) - -proc compileSearchRegex*(str: string; defaultFlags: LREFlags): - Result[Regex, string] = - # Emulate vim's \c/\C: override defaultFlags if one is found, then remove it - # from str. - # Also, replace \< and \> with \b as (a bit sloppy) vi emulation. - var flags = defaultFlags - var s = newStringOfCap(str.len) - var quot = false - for c in str: - if quot: - quot = false - case c - of 'c': flags.incl(LRE_FLAG_IGNORECASE) - of 'C': flags.excl(LRE_FLAG_IGNORECASE) - of '<', '>': s &= "\\b" - else: s &= '\\' & c - elif c == '\\': - quot = true - else: - s &= c - if quot: - s &= '\\' - flags.incl(LRE_FLAG_GLOBAL) # for easy backwards matching - return compileRegex(s, flags) - -proc exec*(regex: Regex; str: string; start = 0; length = -1; nocaps = false): - RegexResult = - let length = if length == -1: - str.len - else: - length - assert start in 0 .. length - let bytecode = unsafeAddr regex.bytecode[0] - let captureCount = lre_get_capture_count(bytecode) - var capture: ptr UncheckedArray[int] = nil - if captureCount > 0: - let size = sizeof(ptr uint8) * captureCount * 2 - capture = cast[ptr UncheckedArray[int]](alloc0(size)) - var cstr = cstring(str) - let flags = lre_get_flags(bytecode).toLREFlags - var start = start - while true: - let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode, - cast[ptr uint8](cstr), cint(start), cint(length), cint(3), nil) - if ret != 1: #TODO error handling? (-1) - break - result.success = true - if captureCount == 0 or nocaps: - break - var caps: seq[RegexCapture] = @[] - let cstrAddress = cast[int](cstr) - let ps = start - start = capture[1] - cstrAddress - for i in 0 ..< captureCount: - let s = capture[i * 2] - cstrAddress - let e = capture[i * 2 + 1] - cstrAddress - caps.add((s, e)) - result.captures.add(caps) - if LRE_FLAG_GLOBAL notin flags: - break - if start >= str.len: - break - if ps == start: - start += runeLenAt(str, start) - if captureCount > 0: - dealloc(capture) - -proc match*(regex: Regex; str: string; start = 0; length = str.len): bool = - return regex.exec(str, start, length, nocaps = true).success |