about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2023-09-24 01:08:30 +0200
committerbptato <nincsnevem662@gmail.com>2023-09-24 01:17:42 +0200
commite0745a3e88d737833c520ae363eaad9d43f1786d (patch)
treec51024dd741431b3674b3e685d473c52f1c28d9c
parentbe9ba2f95826227fd46fcc46c2ca1bb061acd41e (diff)
downloadchawan-e0745a3e88d737833c520ae363eaad9d43f1786d.tar.gz
regex: copy after compiling
Instead of the broken attempt at making regexes zero-copy (it copied
anyway), copy once and forget about it.

(There have been way too many problems with the destructor approach,
including the latest one where the GC would happily zero out our
regexes if they were in a sequence.

Maybe we can make this work once we switched to ORC. For now, it's
not worth the trouble.)
-rw-r--r--src/io/serialize.nim25
-rw-r--r--src/js/regex.nim55
-rw-r--r--src/local/pager.nim3
3 files changed, 19 insertions, 64 deletions
diff --git a/src/io/serialize.nim b/src/io/serialize.nim
index 6994b042..d2e83524 100644
--- a/src/io/serialize.nim
+++ b/src/io/serialize.nim
@@ -5,7 +5,6 @@ import sets
 import streams
 import tables
 
-import js/regex
 import loader/request
 import types/blob
 import types/buffersource
@@ -73,10 +72,6 @@ proc swrite*[T, E](stream: Stream, o: Result[T, E])
 proc sread*[T, E](stream: Stream, o: var Result[T, E])
 func slen*[T, E](o: Result[T, E]): int
 
-proc swrite*(stream: Stream, regex: Regex)
-proc sread*(stream: Stream, regex: var Regex)
-func slen*(regex: Regex): int
-
 proc swrite*(stream: Stream, source: BufferSource)
 proc sread*(stream: Stream, source: var BufferSource)
 func slen*(source: BufferSource): int
@@ -389,26 +384,6 @@ func slen*[T, E](o: Result[T, E]): int =
     when not (E is void):
       result += slen(o.error)
 
-proc swrite*(stream: Stream, regex: Regex) =
-  stream.swrite(regex.plen)
-  stream.writeData(regex.bytecode, regex.plen)
-  stream.swrite(regex.buf)
-
-proc sread*(stream: Stream, regex: var Regex) =
-  assert regex.bytecode == nil
-  stream.sread(regex.plen)
-  regex.bytecode = cast[ptr uint8](alloc(regex.plen))
-  regex.clone = true
-  let l = stream.readData(regex.bytecode, regex.plen)
-  stream.sread(regex.buf)
-  if l != regex.plen:
-    `=destroy`(regex)
-
-func slen*(regex: Regex): int =
-  result += slen(regex.plen)
-  result += regex.plen
-  result += slen(regex.buf)
-
 proc swrite*(stream: Stream, source: BufferSource) =
   stream.swrite(source.t)
   case source.t
diff --git a/src/js/regex.nim b/src/js/regex.nim
index caa941a2..d73e6e2b 100644
--- a/src/js/regex.nim
+++ b/src/js/regex.nim
@@ -16,10 +16,8 @@ export
 
 type
   Regex* = object
-    bytecode*: ptr uint8
-    plen*: cint
-    clone*: bool
-    buf*: string
+    bytecode: seq[uint8]
+    buf: string
 
   RegexResult* = object
     success*: bool
@@ -33,47 +31,27 @@ type
 var dummyRuntime = JS_NewRuntime()
 var dummyContext = JS_NewContextRaw(dummyRuntime)
 
-when NimMajor >= 2:
-  proc `=destroy`*(regex: Regex) =
-    if regex.bytecode != nil:
-      if regex.clone:
-        dealloc(regex.bytecode)
-      else:
-        dummyRuntime.js_free_rt(regex.bytecode)
-else:
-  proc `=destroy`*(regex: var Regex) =
-    if regex.bytecode != nil:
-      if regex.clone:
-        dealloc(regex.bytecode)
-      else:
-        dummyRuntime.js_free_rt(regex.bytecode)
-      regex.bytecode = nil
-
-proc `=copy`*(dest: var Regex, source: Regex) =
-  if dest.bytecode != source.bytecode:
-    `=destroy`(dest)
-    wasMoved(dest)
-    dest.bytecode = cast[ptr uint8](alloc(source.plen))
-    copyMem(dest.bytecode, source.bytecode, source.plen)
-    dest.clone = true
-    dest.buf = source.buf
-    dest.plen = source.plen
-
 func `$`*(regex: Regex): string =
   regex.buf
 
 proc compileRegex*(buf: string, flags: int): Result[Regex, string] =
-  var regex: Regex
   var error_msg_size = 64
   var error_msg = newString(error_msg_size)
   prepareMutation(error_msg)
-  let bytecode = lre_compile(addr regex.plen, cstring(error_msg),
+  var plen: cint
+  let bytecode = lre_compile(addr plen, cstring(error_msg),
     cint(error_msg_size), cstring(buf), csize_t(buf.len), cint(flags),
     dummyContext)
   if bytecode == nil:
     return err(error_msg.until('\0')) # Failed to compile.
-  regex.buf = buf
-  regex.bytecode = bytecode
+  assert plen > 0
+  var bcseq = newSeqUninitialized[uint8](plen)
+  copyMem(addr bcseq[0], bytecode, plen)
+  dummyRuntime.js_free_rt(bytecode)
+  let regex = Regex(
+    buf: buf,
+    bytecode: bcseq
+  )
   return ok(regex)
 
 func countBackslashes(buf: string, i: int): int =
@@ -147,18 +125,19 @@ proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): R
     str.len
   else:
     length
-  assert 0 <= start and start <= length, "Start: " & $start & ", length: " & $length & " str: " & $str
+  assert 0 <= start and start <= length
 
-  let captureCount = lre_get_capture_count(regex.bytecode)
+  let bytecode = unsafeAddr regex.bytecode[0]
+  let captureCount = lre_get_capture_count(bytecode)
   var capture: ptr UncheckedArray[int] = nil
   if captureCount > 0:
     let size = sizeof(ptr uint8) * captureCount * 2
     capture = cast[ptr UncheckedArray[int]](alloc0(size))
   var cstr = cstring(str)
-  let flags = lre_get_flags(regex.bytecode)
+  let flags = lre_get_flags(bytecode)
   var start = start
   while true:
-    let ret = lre_exec(cast[ptr ptr uint8](capture), regex.bytecode,
+    let ret = lre_exec(cast[ptr ptr uint8](capture), bytecode,
       cast[ptr uint8](cstr), cint(start), cint(length), cint(0), dummyContext)
     if ret != 1: #TODO error handling? (-1)
       break
diff --git a/src/local/pager.nim b/src/local/pager.nim
index d6845636..d4853d31 100644
--- a/src/local/pager.nim
+++ b/src/local/pager.nim
@@ -675,7 +675,8 @@ proc omniRewrite(pager: Pager, s: string): string =
       if sub.isSome:
         return sub.get
       else:
-        pager.alert("Error in substitution of rule " & rule.match.buf & " for " & s)
+        let buf = $rule.match
+        pager.alert("Error in substitution of rule " & buf & " for " & s)
   return s
 
 # When the user has passed a partial URL as an argument, they might've meant