about summary refs log tree commit diff stats
path: root/src/bindings
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2022-07-22 19:52:31 +0200
committerbptato <nincsnevem662@gmail.com>2022-07-22 19:52:31 +0200
commit6f7bcc54ab03bc31be309352c73fd8d8153f9c91 (patch)
treedc4fd8a80ccc8a5a8f7c5a567fcbf80c3e66eccb /src/bindings
parentc69a8ab7576e2053afc5dfcee5c7152a07c31230 (diff)
downloadchawan-6f7bcc54ab03bc31be309352c73fd8d8153f9c91.tar.gz
Add search function
Uses libregexp from QuickJS. Incremental search is quite hacky for now,
but overall it seems to work OK.
Diffstat (limited to 'src/bindings')
-rw-r--r--src/bindings/libregexp.h92
-rw-r--r--src/bindings/libregexp.nim19
-rw-r--r--src/bindings/libunicode.h124
-rw-r--r--src/bindings/quickjs.nim3
4 files changed, 238 insertions, 0 deletions
diff --git a/src/bindings/libregexp.h b/src/bindings/libregexp.h
new file mode 100644
index 00000000..9aedb7e9
--- /dev/null
+++ b/src/bindings/libregexp.h
@@ -0,0 +1,92 @@
+/*
+ * Regular Expression Engine
+ * 
+ * Copyright (c) 2017-2018 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef LIBREGEXP_H
+#define LIBREGEXP_H
+
+#include <stddef.h>
+
+#include "libunicode.h"
+
+#define LRE_BOOL  int       /* for documentation purposes */
+
+#define LRE_FLAG_GLOBAL     (1 << 0)
+#define LRE_FLAG_IGNORECASE (1 << 1)
+#define LRE_FLAG_MULTILINE  (1 << 2)
+#define LRE_FLAG_DOTALL     (1 << 3)
+#define LRE_FLAG_UTF16      (1 << 4)
+#define LRE_FLAG_STICKY     (1 << 5)
+
+#define LRE_FLAG_NAMED_GROUPS (1 << 7) /* named groups are present in the regexp */
+
+uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size,
+                     const char *buf, size_t buf_len, int re_flags,
+                     void *opaque);
+int lre_get_capture_count(const uint8_t *bc_buf);
+int lre_get_flags(const uint8_t *bc_buf);
+const char *lre_get_groupnames(const uint8_t *bc_buf);
+int lre_exec(uint8_t **capture,
+             const uint8_t *bc_buf, const uint8_t *cbuf, int cindex, int clen,
+             int cbuf_type, void *opaque);
+
+int lre_parse_escape(const uint8_t **pp, int allow_utf16);
+LRE_BOOL lre_is_space(int c);
+
+/* must be provided by the user */
+LRE_BOOL lre_check_stack_overflow(void *opaque, size_t alloca_size); 
+void *lre_realloc(void *opaque, void *ptr, size_t size);
+
+/* JS identifier test */
+extern uint32_t const lre_id_start_table_ascii[4];
+extern uint32_t const lre_id_continue_table_ascii[4];
+
+static inline int lre_js_is_ident_first(int c)
+{
+    if ((uint32_t)c < 128) {
+        return (lre_id_start_table_ascii[c >> 5] >> (c & 31)) & 1;
+    } else {
+#ifdef CONFIG_ALL_UNICODE
+        return lre_is_id_start(c);
+#else
+        return !lre_is_space(c);
+#endif
+    }
+}
+
+static inline int lre_js_is_ident_next(int c)
+{
+    if ((uint32_t)c < 128) {
+        return (lre_id_continue_table_ascii[c >> 5] >> (c & 31)) & 1;
+    } else {
+        /* ZWNJ and ZWJ are accepted in identifiers */
+#ifdef CONFIG_ALL_UNICODE
+        return lre_is_id_continue(c) || c == 0x200C || c == 0x200D;
+#else
+        return !lre_is_space(c) || c == 0x200C || c == 0x200D;
+#endif
+    }
+}
+
+#undef LRE_BOOL
+
+#endif /* LIBREGEXP_H */
diff --git a/src/bindings/libregexp.nim b/src/bindings/libregexp.nim
new file mode 100644
index 00000000..1b84400e
--- /dev/null
+++ b/src/bindings/libregexp.nim
@@ -0,0 +1,19 @@
+const lreheader = "bindings/libregexp.h"
+
+const
+  LRE_FLAG_GLOBAL* = 1 shl 0
+  LRE_FLAG_IGNORECASE* = 1 shl 1
+  LRE_FLAG_MULTILINE* = 1 shl 2
+  LRE_FLAG_DOTALL* = 1 shl 3
+  LRE_FLAG_UTF16* = 1 shl 4
+  LRE_FLAG_STICKY* = 1 shl 5
+
+proc lre_compile*(plen: ptr cint, error_msg: cstring, error_msg_size: cint,
+                  buf: cstring, buf_len: csize_t, re_flags: cint,
+                  opaque: pointer): ptr uint8 {.importc: "lre_compile", header: lreheader.}
+
+proc lre_exec*(capture: ptr ptr uint8, bc_buf: ptr uint8, cbuf: ptr uint8,
+               cindex: cint, clen: cint, cbuf_type: cint,
+               opaque: pointer): cint {.importc: "lre_exec", header: lreheader.}
+
+proc lre_get_capture_count*(bc_buf: ptr uint8): cint {.importc: "lre_get_capture_count", header: lreheader.}
diff --git a/src/bindings/libunicode.h b/src/bindings/libunicode.h
new file mode 100644
index 00000000..cfa600a5
--- /dev/null
+++ b/src/bindings/libunicode.h
@@ -0,0 +1,124 @@
+/*
+ * Unicode utilities
+ * 
+ * Copyright (c) 2017-2018 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef LIBUNICODE_H
+#define LIBUNICODE_H
+
+#include <inttypes.h>
+
+#define LRE_BOOL  int       /* for documentation purposes */
+
+/* define it to include all the unicode tables (40KB larger) */
+#define CONFIG_ALL_UNICODE
+
+#define LRE_CC_RES_LEN_MAX 3
+
+typedef enum {
+    UNICODE_NFC,
+    UNICODE_NFD,
+    UNICODE_NFKC,
+    UNICODE_NFKD,
+} UnicodeNormalizationEnum;
+
+int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
+LRE_BOOL lre_is_cased(uint32_t c);
+LRE_BOOL lre_is_case_ignorable(uint32_t c);
+
+/* char ranges */
+
+typedef struct {
+    int len; /* in points, always even */
+    int size;
+    uint32_t *points; /* points sorted by increasing value */
+    void *mem_opaque;
+    void *(*realloc_func)(void *opaque, void *ptr, size_t size);
+} CharRange;
+
+typedef enum {
+    CR_OP_UNION,
+    CR_OP_INTER,
+    CR_OP_XOR,
+} CharRangeOpEnum;
+
+void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
+void cr_free(CharRange *cr);
+int cr_realloc(CharRange *cr, int size);
+int cr_copy(CharRange *cr, const CharRange *cr1);
+
+static inline int cr_add_point(CharRange *cr, uint32_t v)
+{
+    if (cr->len >= cr->size) {
+        if (cr_realloc(cr, cr->len + 1))
+            return -1;
+    }
+    cr->points[cr->len++] = v;
+    return 0;
+}
+
+static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2)
+{
+    if ((cr->len + 2) > cr->size) {
+        if (cr_realloc(cr, cr->len + 2))
+            return -1;
+    }
+    cr->points[cr->len++] = c1;
+    cr->points[cr->len++] = c2;
+    return 0;
+}
+
+int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len);
+
+static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
+{
+    uint32_t b_pt[2];
+    b_pt[0] = c1;
+    b_pt[1] = c2 + 1;
+    return cr_union1(cr, b_pt, 2);
+}
+
+int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
+          const uint32_t *b_pt, int b_len, int op);
+
+int cr_invert(CharRange *cr);
+
+#ifdef CONFIG_ALL_UNICODE
+
+LRE_BOOL lre_is_id_start(uint32_t c);
+LRE_BOOL lre_is_id_continue(uint32_t c);
+
+int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
+                      UnicodeNormalizationEnum n_type,
+                      void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
+
+/* Unicode character range functions */
+
+int unicode_script(CharRange *cr,
+                   const char *script_name, LRE_BOOL is_ext);
+int unicode_general_category(CharRange *cr, const char *gc_name);
+int unicode_prop(CharRange *cr, const char *prop_name);
+
+#endif /* CONFIG_ALL_UNICODE */
+
+#undef LRE_BOOL
+
+#endif /* LIBUNICODE_H */
diff --git a/src/bindings/quickjs.nim b/src/bindings/quickjs.nim
index 76cd74cd..73cba0d2 100644
--- a/src/bindings/quickjs.nim
+++ b/src/bindings/quickjs.nim
@@ -96,6 +96,7 @@ proc JS_NewRuntime*(): JSRuntime {.importc: "JS_NewRuntime", header: qjsheader.}
 proc JS_FreeRuntime*(rt: JSRuntime) {.importc: "JS_FreeRuntime", header: qjsheader.}
 
 proc JS_NewContext*(rt: JSRuntime): JSContext {.importc: "JS_NewContext", header: qjsheader.}
+proc JS_NewContextRaw*(rt: JSRuntime): JSContext {.importc: "JS_NewContextRaw", header: qjsheader.}
 proc JS_FreeContext*(ctx: JSContext) {.importc: "JS_FreeContext", header: qjsheader.}
 
 proc JS_GetGlobalObject*(ctx: JSContext): JSValue {.importc: "JS_GetGlobalObject", header: qjsheader.}
@@ -127,3 +128,5 @@ proc JS_GetException*(ctx: JSContext): JSValue {.importc: "JS_GetException", hea
 
 proc JS_SetContextOpaque*(ctx: JSContext, opaque: pointer) {.importc: "JS_SetContextOpaque", header: qjsheader.}
 proc JS_GetContextOpaque*(ctx: JSContext): pointer {.importc: "JS_GetContextOpaque", header: qjsheader.}
+
+proc js_free_rt*(rt: JSRuntime, p: pointer) {.importc: "js_free_rt".}