diff options
author | bptato <nincsnevem662@gmail.com> | 2023-01-01 16:14:01 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2023-01-01 18:31:17 +0100 |
commit | 1a24350ee44859f6b3370ac15e8875a79fa7ae9b (patch) | |
tree | 0c168d725baa44bf1813c1456c9c835c4e1bf776 /src/js | |
parent | 9a22a844c956d80f0d5c80330e898d52acadfd2f (diff) | |
download | chawan-1a24350ee44859f6b3370ac15e8875a79fa7ae9b.tar.gz |
buffer: fix some search bugs & refactor regex stuff
cursorBytes uses twidth now. cursorNextMatch matches the byte *after* the cursor (somewhat more consistently than before). match() no longer counts capture groups. LRE_FLAG_GLOBAL now goes through the entire string.
Diffstat (limited to 'src/js')
-rw-r--r-- | src/js/regex.nim | 54 |
1 files changed, 27 insertions, 27 deletions
diff --git a/src/js/regex.nim b/src/js/regex.nim index 2a205130..eb4fe365 100644 --- a/src/js/regex.nim +++ b/src/js/regex.nim @@ -6,6 +6,7 @@ import unicode import bindings/libregexp import bindings/quickjs import js/javascript +import utils/twtstr export LRE_FLAG_GLOBAL, @@ -158,21 +159,19 @@ proc compileSearchRegex*(str: string): Option[Regex] = else: assert false return compileRegex(str.substr(0, flagsi - 1), flags) -proc exec*(regex: Regex, str: string, start = 0, length = str.len): RegexResult = +proc exec*(regex: Regex, str: string, start = 0, length = -1, nocaps = false): RegexResult = + let length = if length == -1: + str.len + else: + length assert 0 <= start and start <= length, "Start: " & $start & ", length: " & $length & " str: " & $str let captureCount = lre_get_capture_count(regex.bytecode) - var capture: ptr ptr uint8 = nil if captureCount > 0: capture = cast[ptr ptr uint8](alloc0(sizeof(ptr uint8) * captureCount * 2)) - var cstr = cstring(str) - var ascii = true - for c in str: - if c > char(0x80): - ascii = false - break + let ascii = str.isAscii() var ustr: string16 if not ascii: if start != 0 or length != str.len: @@ -180,27 +179,25 @@ proc exec*(regex: Regex, str: string, start = 0, length = str.len): RegexResult else: ustr = toUTF16(str) cstr = cstring(ustr) - - let ret = lre_exec(capture, regex.bytecode, - cast[ptr uint8](cstr), cint(start), - cint(length), cint(not ascii), dummyContext) - - result.success = ret == 1 #TODO error handling? (-1) - - if result.success: - var i = 0 + let flags = lre_get_flags(regex.bytecode) + var start = start + while true: + let ret = lre_exec(capture, regex.bytecode, + cast[ptr uint8](cstr), cint(start), + cint(length), cint(not ascii), dummyContext) + if ret != 1: #TODO error handling? (-1) + break + result.success = true + if captureCount == 0 or nocaps: + break let cstrAddress = cast[int](cstr) + start = cast[ptr int](cast[int](capture) + sizeof(ptr uint8))[] - cstrAddress + var i = 0 while i < captureCount * sizeof(ptr uint8): - let startPointerAddress = cast[int](capture) + i + let s = cast[ptr int](cast[int](capture) + i)[] - cstrAddress i += sizeof(ptr uint8) - let endPointerAddress = cast[int](capture) + i + let e = cast[ptr int](cast[int](capture) + i)[] - cstrAddress i += sizeof(ptr uint8) - let startPointer = cast[ptr ptr uint8](startPointerAddress) - let endPointer = cast[ptr ptr uint8](endPointerAddress) - let startAddress = cast[int](startPointer[]) - let endAddress = cast[int](endPointer[]) - var s = startAddress - cstrAddress - var e = endAddress - cstrAddress if ascii: result.captures.add((s, e)) else: @@ -217,7 +214,10 @@ proc exec*(regex: Regex, str: string, start = 0, length = str.len): RegexResult fastRuneAt(ustr, i, r) e8 += r.size() result.captures.add((s8, e8)) - dealloc(capture) + if (flags and LRE_FLAG_GLOBAL) != 1: + break + if captureCount > 0: + dealloc(capture) proc match*(regex: Regex, str: string, start = 0, length = str.len): bool = - return regex.exec(str, start, length).success + return regex.exec(str, start, length, nocaps = true).success |