diff options
author | bptato <nincsnevem662@gmail.com> | 2023-09-24 01:08:30 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2023-09-24 01:17:42 +0200 |
commit | e0745a3e88d737833c520ae363eaad9d43f1786d (patch) | |
tree | c51024dd741431b3674b3e685d473c52f1c28d9c | |
parent | be9ba2f95826227fd46fcc46c2ca1bb061acd41e (diff) | |
download | chawan-e0745a3e88d737833c520ae363eaad9d43f1786d.tar.gz |
regex: copy after compiling
Instead of the broken attempt at making regexes zero-copy (it copied anyway), copy once and forget about it. (There have been way too many problems with the destructor approach, including the latest one where the GC would happily zero out our regexes if they were in a sequence. Maybe we can make this work once we switched to ORC. For now, it's not worth the trouble.)
-rw-r--r-- | src/io/serialize.nim | 25 | ||||
-rw-r--r-- | src/js/regex.nim | 55 | ||||
-rw-r--r-- | src/local/pager.nim | 3 |
3 files changed, 19 insertions, 64 deletions
diff --git a/src/io/serialize.nim b/src/io/serialize.nim index 6994b042..d2e83524 100644 --- a/src/io/serialize.nim +++ b/src/io/serialize.nim @@ -5,7 +5,6 @@ import sets import streams import tables -import js/regex import loader/request import types/blob import types/buffersource @@ -73,10 +72,6 @@ proc swrite*[T, E](stream: Stream, o: Result[T, E]) proc sread*[T, E](stream: Stream, o: var Result[T, E]) func slen*[T, E](o: Result[T, E]): int -proc swrite*(stream: Stream, regex: Regex) -proc sread*(stream: Stream, regex: var Regex) -func slen*(regex: Regex): int - proc swrite*(stream: Stream, source: BufferSource) proc sread*(stream: Stream, source: var BufferSource) func slen*(source: BufferSource): int @@ -389,26 +384,6 @@ func slen*[T, E](o: Result[T, E]): int = when not (E is void): result += slen(o.error) -proc swrite*(stream: Stream, regex: Regex) = - stream.swrite(regex.plen) - stream.writeData(regex.bytecode, regex.plen) - stream.swrite(regex.buf) - -proc sread*(stream: Stream, regex: var Regex) = - assert regex.bytecode == nil - stream.sread(regex.plen) - regex.bytecode = cast[ptr uint8](alloc(regex.plen)) - regex.clone = true - let l = stream.readData(regex.bytecode, regex.plen) - stream.sread(regex.buf) - if l != regex.plen: - `=destroy`(regex) - -func slen*(regex: Regex): int = - result += slen(regex.plen) - result += regex.plen - result += slen(regex.buf) - proc swrite*(stream: Stream, source: BufferSource) = stream.swrite(source.t) case source.t diff --git a/src/js/regex.nim b/src/js/regex.nim index caa941a2..d73e6e2b 100644 --- a/src/js/regex.nim +++ b/src/js/regex.nim @@ -16,10 +16,8 @@ export type Regex* = object - bytecode*: ptr uint8 - plen*: cint - clone*: bool - buf*: string + bytecode: seq[uint8] + buf: string RegexResult* = object success*: bool @@ -33,47 +31,27 @@ type var dummyRuntime = JS_NewRuntime() var dummyContext = JS_NewContextRaw(dummyRuntime) -when NimMajor >= 2: - proc `=destroy`*(regex: Regex) = - if regex.bytecode != nil: - if regex.clone: - dealloc(regex.bytecode) - else: - dummyRuntime.js_free_rt(regex.bytecode) -else: - proc `=destroy`*(regex: var Regex) = - if regex.bytecode != nil: - if regex.clone: - dealloc(regex.bytecode) - else: - dummyRuntime.js_free_rt(regex.bytecode) - regex.bytecode = nil - -proc `=copy`*(dest: var Regex, source: Regex) = - if dest.bytecode != source.bytecode: - `=destroy`(dest) - wasMoved(dest) - dest.bytecode = cast[ptr uint8](alloc(source.plen)) - copyMem(dest.bytecode, source.bytecode, source.plen) - dest.clone = true - dest.buf = source.buf - dest.plen = source.plen - func `$`*(regex: Regex): string = regex.buf proc compileRegex*(buf: string, flags: int): Result[Regex, string] = - var regex: Regex var error_msg_size = 64 var error_msg = newString(error_msg_size) prepareMutation(error_msg) - let bytecode = lre_compile(addr regex.plen, cstring(error_msg), + var plen: cint + let bytecode = lre_compile(addr plen, cstring(error_msg), cint(error_msg_size), cstring(buf), csize_t(buf.len), cint(flags), dummyContext) if bytecode == nil: return err(error_msg.until('\0')) # Failed to compile. - regex.buf = buf - regex.bytecode = bytecode + assert plen > 0 + var bcseq = newSeqUninitialized[uint8](plen) + copyMem(addr bcseq[0], bytecode, plen) + dummyRuntime.js_free_rt(bytecode) + let regex = Regex( + buf: buf, + bytecode: bcseq + ) return ok(regex) func countBackslashes(buf: string, i: int): int = @@ -147,18 +125,19 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R str.len else: length - assert 0 <= start and start <= length, "Start: " & $start & ", length: " & $length & " str: " & $str + assert 0 <= start and start <= length - let captureCount = lre_get_capture_count(regex.bytecode) + let bytecode = unsafeAddr regex.bytecode[0] + let captureCount = lre_get_capture_count(bytecode) var capture: ptr UncheckedArray[int] = nil if captureCount > 0: let size = sizeof(ptr uint8) * captureCount * 2 capture = cast[ptr UncheckedArray[int]](alloc0(size)) var cstr = cstring(str) - let flags = lre_get_flags(regex.bytecode) + let flags = lre_get_flags(bytecode) var start = start while true: - let ret = lre_exec(cast[ptr ptr uint8](capture), regex.bytecode, + let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode, cast[ptr uint8](cstr), cint(start), cint(length), cint(0), dummyContext) if ret != 1: #TODO error handling? (-1) break diff --git a/src/local/pager.nim b/src/local/pager.nim index d6845636..d4853d31 100644 --- a/src/local/pager.nim +++ b/src/local/pager.nim @@ -675,7 +675,8 @@ proc omniRewrite(pager: Pager, s: string): string = if sub.isSome: return sub.get else: - pager.alert("Error in substitution of rule " & rule.match.buf & " for " & s) + let buf = $rule.match + pager.alert("Error in substitution of rule " & buf & " for " & s) return s # When the user has passed a partial URL as an argument, they might've meant |