about summary refs log tree commit diff stats
path: root/src/bindings/libunicode.h
diff options
context:
space:
mode:
authorbptato <nincsnevem662@gmail.com>2022-07-22 19:52:31 +0200
committerbptato <nincsnevem662@gmail.com>2022-07-22 19:52:31 +0200
commit6f7bcc54ab03bc31be309352c73fd8d8153f9c91 (patch)
treedc4fd8a80ccc8a5a8f7c5a567fcbf80c3e66eccb /src/bindings/libunicode.h
parentc69a8ab7576e2053afc5dfcee5c7152a07c31230 (diff)
downloadchawan-6f7bcc54ab03bc31be309352c73fd8d8153f9c91.tar.gz
Add search function
Uses libregexp from QuickJS. Incremental search is quite hacky for now,
but overall it seems to work OK.
Diffstat (limited to 'src/bindings/libunicode.h')
-rw-r--r--src/bindings/libunicode.h124
1 files changed, 124 insertions, 0 deletions
diff --git a/src/bindings/libunicode.h b/src/bindings/libunicode.h
new file mode 100644
index 00000000..cfa600a5
--- /dev/null
+++ b/src/bindings/libunicode.h
@@ -0,0 +1,124 @@
+/*
+ * Unicode utilities
+ * 
+ * Copyright (c) 2017-2018 Fabrice Bellard
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in
+ * all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
+ * THE SOFTWARE.
+ */
+#ifndef LIBUNICODE_H
+#define LIBUNICODE_H
+
+#include <inttypes.h>
+
+#define LRE_BOOL  int       /* for documentation purposes */
+
+/* define it to include all the unicode tables (40KB larger) */
+#define CONFIG_ALL_UNICODE
+
+#define LRE_CC_RES_LEN_MAX 3
+
+typedef enum {
+    UNICODE_NFC,
+    UNICODE_NFD,
+    UNICODE_NFKC,
+    UNICODE_NFKD,
+} UnicodeNormalizationEnum;
+
+int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
+LRE_BOOL lre_is_cased(uint32_t c);
+LRE_BOOL lre_is_case_ignorable(uint32_t c);
+
+/* char ranges */
+
+typedef struct {
+    int len; /* in points, always even */
+    int size;
+    uint32_t *points; /* points sorted by increasing value */
+    void *mem_opaque;
+    void *(*realloc_func)(void *opaque, void *ptr, size_t size);
+} CharRange;
+
+typedef enum {
+    CR_OP_UNION,
+    CR_OP_INTER,
+    CR_OP_XOR,
+} CharRangeOpEnum;
+
+void cr_init(CharRange *cr, void *mem_opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
+void cr_free(CharRange *cr);
+int cr_realloc(CharRange *cr, int size);
+int cr_copy(CharRange *cr, const CharRange *cr1);
+
+static inline int cr_add_point(CharRange *cr, uint32_t v)
+{
+    if (cr->len >= cr->size) {
+        if (cr_realloc(cr, cr->len + 1))
+            return -1;
+    }
+    cr->points[cr->len++] = v;
+    return 0;
+}
+
+static inline int cr_add_interval(CharRange *cr, uint32_t c1, uint32_t c2)
+{
+    if ((cr->len + 2) > cr->size) {
+        if (cr_realloc(cr, cr->len + 2))
+            return -1;
+    }
+    cr->points[cr->len++] = c1;
+    cr->points[cr->len++] = c2;
+    return 0;
+}
+
+int cr_union1(CharRange *cr, const uint32_t *b_pt, int b_len);
+
+static inline int cr_union_interval(CharRange *cr, uint32_t c1, uint32_t c2)
+{
+    uint32_t b_pt[2];
+    b_pt[0] = c1;
+    b_pt[1] = c2 + 1;
+    return cr_union1(cr, b_pt, 2);
+}
+
+int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
+          const uint32_t *b_pt, int b_len, int op);
+
+int cr_invert(CharRange *cr);
+
+#ifdef CONFIG_ALL_UNICODE
+
+LRE_BOOL lre_is_id_start(uint32_t c);
+LRE_BOOL lre_is_id_continue(uint32_t c);
+
+int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
+                      UnicodeNormalizationEnum n_type,
+                      void *opaque, void *(*realloc_func)(void *opaque, void *ptr, size_t size));
+
+/* Unicode character range functions */
+
+int unicode_script(CharRange *cr,
+                   const char *script_name, LRE_BOOL is_ext);
+int unicode_general_category(CharRange *cr, const char *gc_name);
+int unicode_prop(CharRange *cr, const char *prop_name);
+
+#endif /* CONFIG_ALL_UNICODE */
+
+#undef LRE_BOOL
+
+#endif /* LIBUNICODE_H */