diff options
author | bptato <nincsnevem662@gmail.com> | 2022-07-22 19:52:31 +0200 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2022-07-22 19:52:31 +0200 |
commit | 6f7bcc54ab03bc31be309352c73fd8d8153f9c91 (patch) | |
tree | dc4fd8a80ccc8a5a8f7c5a567fcbf80c3e66eccb /src/bindings | |
parent | c69a8ab7576e2053afc5dfcee5c7152a07c31230 (diff) | |
download | chawan-6f7bcc54ab03bc31be309352c73fd8d8153f9c91.tar.gz |
Add search function
Uses libregexp from QuickJS. Incremental search is quite hacky for now, but overall it seems to work OK.
Diffstat (limited to 'src/bindings')
-rw-r--r-- | src/bindings/libregexp.h | 92 | ||||
-rw-r--r-- | src/bindings/libregexp.nim | 19 | ||||
-rw-r--r-- | src/bindings/libunicode.h | 124 | ||||
-rw-r--r-- | src/bindings/quickjs.nim | 3 |
4 files changed, 238 insertions, 0 deletions
diff --git a/src/bindings/libregexp.h b/src/bindings/libregexp.h new file mode 100644 index 00000000..9aedb7e9 --- /dev/null +++ b/src/bindings/libregexp.h @@ -0,0 +1,92 @@ +/* + * Regular Expression Engine + * + * Copyright (c) 2017-2018 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef LIBREGEXP_H +#define LIBREGEXP_H + +#include <stddef.h> + +#include "libunicode.h" + +#define LRE_BOOL int /* for documentation purposes */ + +#define LRE_FLAG_GLOBAL (1 << 0) +#define LRE_FLAG_IGNORECASE (1 << 1) +#define LRE_FLAG_MULTILINE (1 << 2) +#define LRE_FLAG_DOTALL (1 << 3) +#define LRE_FLAG_UTF16 (1 << 4) +#define LRE_FLAG_STICKY (1 << 5) + +#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */ + +uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, + const char *buf, size_t buf_len, int re_flags, + void *opaque); +int lre_get_capture_count(const uint8_t *bc_buf); +int lre_get_flags(const uint8_t *bc_buf); +const char *lre_get_groupnames(const uint8_t *bc_buf); +int lre_exec(uint8_t **capture, + const uint8_t *bc_buf, const uint8_t *cbuf, int cindex, int clen, + int cbuf_type, void *opaque); + +int lre_parse_escape(const uint8_t **pp, int allow_utf16); +LRE_BOOL lre_is_space(int c); + +/* must be provided by the user */ +LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size); +void *lre_realloc(void *opaque, void *ptr, size_t size); + +/* JS identifier test */ +extern uint32_t const lre_id_start_table_ascii[4]; +extern uint32_t const lre_id_continue_table_ascii[4]; + +static inline int lre_js_is_ident_first(int c) +{ + if ((uint32_t)c < 128) { + return (lre_id_start_table_ascii[c >> 5] >> (c & 31)) & 1; + } else { +#ifdef CONFIG_ALL_UNICODE + return lre_is_id_start(c); +#else + return !lre_is_space(c); +#endif + } +} + +static inline int lre_js_is_ident_next(int c) +{ + if ((uint32_t)c < 128) { + return (lre_id_continue_table_ascii[c >> 5] >> (c & 31)) & 1; + } else { + /* ZWNJ and ZWJ are accepted in identifiers */ +#ifdef CONFIG_ALL_UNICODE + return lre_is_id_continue(c) || c == 0x200C || c == 0x200D; +#else + return !lre_is_space(c) || c == 0x200C || c == 0x200D; +#endif + } +} + +#undef LRE_BOOL + +#endif /* LIBREGEXP_H */ diff --git a/src/bindings/libregexp.nim b/src/bindings/libregexp.nim new file mode 100644 index 00000000..1b84400e --- /dev/null +++ b/src/bindings/libregexp.nim @@ -0,0 +1,19 @@ +const lreheader = "bindings/libregexp.h" + +const + LRE_FLAG_GLOBAL* = 1 shl 0 + LRE_FLAG_IGNORECASE* = 1 shl 1 + LRE_FLAG_MULTILINE* = 1 shl 2 + LRE_FLAG_DOTALL* = 1 shl 3 + LRE_FLAG_UTF16* = 1 shl 4 + LRE_FLAG_STICKY* = 1 shl 5 + +proc lre_compile*(plen: ptr cint, error_msg: cstring, error_msg_size: cint, + buf: cstring, buf_len: csize_t, re_flags: cint, + opaque: pointer): ptr uint8 {.importc: "lre_compile", header: lreheader.} + +proc lre_exec*(capture: ptr ptr uint8, bc_buf: ptr uint8, cbuf: ptr uint8, + cindex: cint, clen: cint, cbuf_type: cint, + opaque: pointer): cint {.importc: "lre_exec", header: lreheader.} + +proc lre_get_capture_count*(bc_buf: ptr uint8): cint {.importc: "lre_get_capture_count", header: lreheader.} diff --git a/src/bindings/libunicode.h b/src/bindings/libunicode.h new file mode 100644 index 00000000..cfa600a5 --- /dev/null +++ b/src/bindings/libunicode.h @@ -0,0 +1,124 @@ +/* + * Unicode utilities + * + * Copyright (c) 2017-2018 Fabrice Bellard + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef LIBUNICODE_H +#define LIBUNICODE_H + +#include <inttypes.h> + +#define LRE_BOOL int /* for documentation purposes */ + +/* define it to include all the unicode tables (40KB larger) */ +#define CONFIG_ALL_UNICODE + +#define LRE_CC_RES_LEN_MAX 3 + +typedef enum { + UNICODE_NFC, + UNICODE_NFD, + UNICODE_NFKC, + UNICODE_NFKD, +} UnicodeNormalizationEnum; + +int lre_case_conv(uint32_t *res, uint32_t c, int conv_type); +LRE_BOOL lre_is_cased(uint32_t c); +LRE_BOOL lre_is_case_ignorable(uint32_t c); + +/* char ranges */ + +typedef struct { + int len; /* in points, always even */ + int size; + uint32_t *points; /* points sorted by increasing value */ + void *mem_opaque; + void *(*realloc_func)(void *opaque, void *ptr, size_t size); +} CharRange; + +typedef enum { + CR_OP_UNION, + CR_OP_INTER, + CR_OP_XOR, +} CharRangeOpEnum; + +void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size)); +void cr_free(CharRange *cr); +int cr_realloc(CharRange *cr, int size); +int cr_copy(CharRange *cr, const CharRange *cr1); + +static inline int cr_add_point(CharRange *cr, uint32_t v) +{ + if (cr->len >= cr->size) { + if (cr_realloc(cr, cr->len + 1)) + return -1; + } + cr->points[cr->len++] = v; + return 0; +} + +static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2) +{ + if ((cr->len + 2) > cr->size) { + if (cr_realloc(cr, cr->len + 2)) + return -1; + } + cr->points[cr->len++] = c1; + cr->points[cr->len++] = c2; + return 0; +} + +int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len); + +static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2) +{ + uint32_t b_pt[2]; + b_pt[0] = c1; + b_pt[1] = c2 + 1; + return cr_union1(cr, b_pt, 2); +} + +int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len, + const uint32_t *b_pt, int b_len, int op); + +int cr_invert(CharRange *cr); + +#ifdef CONFIG_ALL_UNICODE + +LRE_BOOL lre_is_id_start(uint32_t c); +LRE_BOOL lre_is_id_continue(uint32_t c); + +int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len, + UnicodeNormalizationEnum n_type, + void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size)); + +/* Unicode character range functions */ + +int unicode_script(CharRange *cr, + const char *script_name, LRE_BOOL is_ext); +int unicode_general_category(CharRange *cr, const char *gc_name); +int unicode_prop(CharRange *cr, const char *prop_name); + +#endif /* CONFIG_ALL_UNICODE */ + +#undef LRE_BOOL + +#endif /* LIBUNICODE_H */ diff --git a/src/bindings/quickjs.nim b/src/bindings/quickjs.nim index 76cd74cd..73cba0d2 100644 --- a/src/bindings/quickjs.nim +++ b/src/bindings/quickjs.nim @@ -96,6 +96,7 @@ proc JS_NewRuntime*(): JSRuntime {.importc: "JS_NewRuntime", header: qjsheader.} proc JS_FreeRuntime*(rt: JSRuntime) {.importc: "JS_FreeRuntime", header: qjsheader.} proc JS_NewContext*(rt: JSRuntime): JSContext {.importc: "JS_NewContext", header: qjsheader.} +proc JS_NewContextRaw*(rt: JSRuntime): JSContext {.importc: "JS_NewContextRaw", header: qjsheader.} proc JS_FreeContext*(ctx: JSContext) {.importc: "JS_FreeContext", header: qjsheader.} proc JS_GetGlobalObject*(ctx: JSContext): JSValue {.importc: "JS_GetGlobalObject", header: qjsheader.} @@ -127,3 +128,5 @@ proc JS_GetException*(ctx: JSContext): JSValue {.importc: "JS_GetException", hea proc JS_SetContextOpaque*(ctx: JSContext, opaque: pointer) {.importc: "JS_SetContextOpaque", header: qjsheader.} proc JS_GetContextOpaque*(ctx: JSContext): pointer {.importc: "JS_GetContextOpaque", header: qjsheader.} + +proc js_free_rt*(rt: JSRuntime, p: pointer) {.importc: "js_free_rt".} |