about summary refs log tree commit diff stats
path: root/lib/quickjs/libunicode.h
diff options
context:
space:
mode:
Diffstat (limited to 'lib/quickjs/libunicode.h')
-rw-r--r--lib/quickjs/libunicode.h103
1 files changed, 79 insertions, 24 deletions
diff --git a/lib/quickjs/libunicode.h b/lib/quickjs/libunicode.h
index f416157b..cc2f244c 100644
--- a/lib/quickjs/libunicode.h
+++ b/lib/quickjs/libunicode.h
@@ -24,27 +24,13 @@
 #ifndef LIBUNICODE_H
 #define LIBUNICODE_H
 
-#include <inttypes.h>
-
-#define LRE_BOOL  int       /* for documentation purposes */
+#include <stdint.h>
 
 /* define it to include all the unicode tables (40KB larger) */
 #define CONFIG_ALL_UNICODE
 
 #define LRE_CC_RES_LEN_MAX 3
 
-typedef enum {
-    UNICODE_NFC,
-    UNICODE_NFD,
-    UNICODE_NFKC,
-    UNICODE_NFKD,
-} UnicodeNormalizationEnum;
-
-int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
-int lre_canonicalize(uint32_t c, LRE_BOOL is_unicode);
-LRE_BOOL lre_is_cased(uint32_t c);
-LRE_BOOL lre_is_case_ignorable(uint32_t c);
-
 /* char ranges */
 
 typedef struct {
@@ -102,12 +88,14 @@ int cr_op(CharRange *cr, const uint32_t *a_pt, int a_len,
 
 int cr_invert(CharRange *cr);
 
-int cr_regexp_canonicalize(CharRange *cr, LRE_BOOL is_unicode);
-
-#ifdef CONFIG_ALL_UNICODE
+int cr_regexp_canonicalize(CharRange *cr, int is_unicode);
 
-LRE_BOOL lre_is_id_start(uint32_t c);
-LRE_BOOL lre_is_id_continue(uint32_t c);
+typedef enum {
+    UNICODE_NFC,
+    UNICODE_NFD,
+    UNICODE_NFKC,
+    UNICODE_NFKD,
+} UnicodeNormalizationEnum;
 
 int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
                       UnicodeNormalizationEnum n_type,
@@ -115,13 +103,80 @@ int unicode_normalize(uint32_t **pdst, const uint32_t *src, int src_len,
 
 /* Unicode character range functions */
 
-int unicode_script(CharRange *cr,
-                   const char *script_name, LRE_BOOL is_ext);
+int unicode_script(CharRange *cr, const char *script_name, int is_ext);
 int unicode_general_category(CharRange *cr, const char *gc_name);
 int unicode_prop(CharRange *cr, const char *prop_name);
 
-#endif /* CONFIG_ALL_UNICODE */
+int lre_case_conv(uint32_t *res, uint32_t c, int conv_type);
+int lre_canonicalize(uint32_t c, int is_unicode);
+
+/* Code point type categories */
+enum {
+    UNICODE_C_SPACE  = (1 << 0),
+    UNICODE_C_DIGIT  = (1 << 1),
+    UNICODE_C_UPPER  = (1 << 2),
+    UNICODE_C_LOWER  = (1 << 3),
+    UNICODE_C_UNDER  = (1 << 4),
+    UNICODE_C_DOLLAR = (1 << 5),
+    UNICODE_C_XDIGIT = (1 << 6),
+};
+extern uint8_t const lre_ctype_bits[256];
+
+/* zero or non-zero return value */
+int lre_is_cased(uint32_t c);
+int lre_is_case_ignorable(uint32_t c);
+int lre_is_id_start(uint32_t c);
+int lre_is_id_continue(uint32_t c);
+
+static inline int lre_is_space_byte(uint8_t c) {
+    return lre_ctype_bits[c] & UNICODE_C_SPACE;
+}
+
+static inline int lre_is_id_start_byte(uint8_t c) {
+    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
+                                UNICODE_C_UNDER | UNICODE_C_DOLLAR);
+}
 
-#undef LRE_BOOL
+static inline int lre_is_id_continue_byte(uint8_t c) {
+    return lre_ctype_bits[c] & (UNICODE_C_UPPER | UNICODE_C_LOWER |
+                                UNICODE_C_UNDER | UNICODE_C_DOLLAR |
+                                UNICODE_C_DIGIT);
+}
+
+int lre_is_space_non_ascii(uint32_t c);
+
+static inline int lre_is_space(uint32_t c) {
+    if (c < 256)
+        return lre_is_space_byte(c);
+    else
+        return lre_is_space_non_ascii(c);
+}
+
+static inline int lre_js_is_ident_first(uint32_t c) {
+    if (c < 128) {
+        return lre_is_id_start_byte(c);
+    } else {
+#ifdef CONFIG_ALL_UNICODE
+        return lre_is_id_start(c);
+#else
+        return !lre_is_space_non_ascii(c);
+#endif
+    }
+}
+
+static inline int lre_js_is_ident_next(uint32_t c) {
+    if (c < 128) {
+        return lre_is_id_continue_byte(c);
+    } else {
+        /* ZWNJ and ZWJ are accepted in identifiers */
+        if (c >= 0x200C && c <= 0x200D)
+            return TRUE;
+#ifdef CONFIG_ALL_UNICODE
+        return lre_is_id_continue(c);
+#else
+        return !lre_is_space_non_ascii(c);
+#endif
+    }
+}
 
 #endif /* LIBUNICODE_H */
s revision' href='/ahoang/chawan/blame/src/loader/dirlist.nim?id=7319240c86caaa6782afc79418a070831a11351e'>^
17969768 ^

2277b199 ^
83dae4a8 ^
2277b199 ^


83dae4a8 ^
2277b199 ^
46837a6b ^
2277b199 ^








83dae4a8 ^
7badcc1f ^
83dae4a8 ^
2277b199 ^


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60