about summary refs log tree commit diff stats
diff options
context:
space:
mode:
authorFabrice Bellard <fabrice@bellard.org>2023-12-09 12:28:13 +0100
committerbptato <nincsnevem662@gmail.com>2023-12-09 22:26:51 +0100
commitf5ecf8c6b6756f37bfbfad4cedf91c53c947e707 (patch)
treed236df210ea7fd732cedd97a928e63b43f4521f4
parent617c29296ea9805fb715750333d5d0dcaa19878b (diff)
downloadchawan-f5ecf8c6b6756f37bfbfad4cedf91c53c947e707.tar.gz
Retrieve RegExp 'g' flag in spec conformant way (original patch by bnoordhuis)
-rw-r--r--lib/quickjs/libregexp.c23
-rw-r--r--lib/quickjs/quickjs.c35
2 files changed, 41 insertions, 17 deletions
diff --git a/lib/quickjs/libregexp.c b/lib/quickjs/libregexp.c
index 0dfd873a..05df8653 100644
--- a/lib/quickjs/libregexp.c
+++ b/lib/quickjs/libregexp.c
@@ -1071,11 +1071,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len)
 }
 
 /* '*pp' is the first char after '<' */
-static int re_parse_group_name(char *buf, int buf_size,
-                               const uint8_t **pp, BOOL is_utf16)
+static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp)
 {
-    const uint8_t *p;
-    uint32_t c;
+    const uint8_t *p, *p1;
+    uint32_t c, d;
     char *q;
 
     p = *pp;
@@ -1086,11 +1085,18 @@ static int re_parse_group_name(char *buf, int buf_size,
             p++;
             if (*p != 'u')
                 return -1;
-            c = lre_parse_escape(&p, is_utf16 * 2);
+            c = lre_parse_escape(&p, 2); // accept surrogate pairs
         } else if (c == '>') {
             break;
         } else if (c >= 128) {
             c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p);
+            if (c >= 0xD800 && c <= 0xDBFF) {
+                d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1);
+                if (d >= 0xDC00 && d <= 0xDFFF) {
+                    c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00);
+                    p = p1;
+                }
+            }
         } else {
             p++;
         }
@@ -1140,8 +1146,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures,
                     /* potential named capture */
                     if (capture_name) {
                         p += 3;
-                        if (re_parse_group_name(name, sizeof(name), &p,
-                                                s->is_utf16) == 0) {
+                        if (re_parse_group_name(name, sizeof(name), &p) == 0) {
                             if (!strcmp(name, capture_name))
                                 return capture_index;
                         }
@@ -1314,7 +1319,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
             } else if (p[2] == '<') {
                 p += 3;
                 if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
-                                        &p, s->is_utf16)) {
+                                        &p)) {
                     return re_parse_error(s, "invalid group name");
                 }
                 if (find_group_name(s, s->u.tmp_buf) > 0) {
@@ -1378,7 +1383,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir)
                 }
                 p1 += 3;
                 if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf),
-                                        &p1, s->is_utf16)) {
+                                        &p1)) {
                     if (s->is_utf16 || re_has_named_captures(s))
                         return re_parse_error(s, "invalid group name");
                     else
diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c
index 3aeab77d..ab69df1b 100644
--- a/lib/quickjs/quickjs.c
+++ b/lib/quickjs/quickjs.c
@@ -42765,7 +42765,7 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val,
 {
     // [Symbol.match](str)
     JSValueConst rx = this_val;
-    JSValue A, S, result, matchStr;
+    JSValue A, S, flags, result, matchStr;
     int global, n, fullUnicode, isEmpty;
     JSString *p;
 
@@ -42773,16 +42773,23 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val,
         return JS_ThrowTypeErrorNotAnObject(ctx);
 
     A = JS_UNDEFINED;
+    flags = JS_UNDEFINED;
     result = JS_UNDEFINED;
     matchStr = JS_UNDEFINED;
     S = JS_ToString(ctx, argv[0]);
     if (JS_IsException(S))
         goto exception;
 
-    global = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_global));
-    if (global < 0)
+    flags = JS_GetProperty(ctx, rx, JS_ATOM_flags);
+    if (JS_IsException(flags))
+        goto exception;
+    flags = JS_ToStringFree(ctx, flags);
+    if (JS_IsException(flags))
         goto exception;
+    p = JS_VALUE_GET_STRING(flags);
 
+    // TODO(bnoordhuis) query 'u' flag the same way?
+    global = (-1 != string_indexof_char(p, 'g', 0));
     if (!global) {
         A = JS_RegExpExec(ctx, rx, S);
     } else {
@@ -42826,12 +42833,14 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val,
         }
     }
     JS_FreeValue(ctx, result);
+    JS_FreeValue(ctx, flags);
     JS_FreeValue(ctx, S);
     return A;
 
 exception:
     JS_FreeValue(ctx, A);
     JS_FreeValue(ctx, result);
+    JS_FreeValue(ctx, flags);
     JS_FreeValue(ctx, S);
     return JS_EXCEPTION;
 }
@@ -43074,8 +43083,8 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val,
     // [Symbol.replace](str, rep)
     JSValueConst rx = this_val, rep = argv[1];
     JSValueConst args[6];
-    JSValue str, rep_val, matched, tab, rep_str, namedCaptures, res;
-    JSString *sp, *rp;
+    JSValue flags, str, rep_val, matched, tab, rep_str, namedCaptures, res;
+    JSString *p, *sp, *rp;
     StringBuffer b_s, *b = &b_s;
     ValueBuffer v_b, *results = &v_b;
     int nextSourcePosition, n, j, functionalReplace, is_global, fullUnicode;
@@ -43091,6 +43100,7 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val,
     rep_val = JS_UNDEFINED;
     matched = JS_UNDEFINED;
     tab = JS_UNDEFINED;
+    flags = JS_UNDEFINED;
     rep_str = JS_UNDEFINED;
     namedCaptures = JS_UNDEFINED;
 
@@ -43107,10 +43117,18 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val,
             goto exception;
         rp = JS_VALUE_GET_STRING(rep_val);
     }
-    fullUnicode = 0;
-    is_global = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_global));
-    if (is_global < 0)
+
+    flags = JS_GetProperty(ctx, rx, JS_ATOM_flags);
+    if (JS_IsException(flags))
+        goto exception;
+    flags = JS_ToStringFree(ctx, flags);
+    if (JS_IsException(flags))
         goto exception;
+    p = JS_VALUE_GET_STRING(flags);
+
+    // TODO(bnoordhuis) query 'u' flag the same way?
+    fullUnicode = 0;
+    is_global = (-1 != string_indexof_char(p, 'g', 0));
     if (is_global) {
         fullUnicode = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_unicode));
         if (fullUnicode < 0)
@@ -43244,6 +43262,7 @@ done1:
     value_buffer_free(results);
     JS_FreeValue(ctx, rep_val);
     JS_FreeValue(ctx, matched);
+    JS_FreeValue(ctx, flags);
     JS_FreeValue(ctx, tab);
     JS_FreeValue(ctx, rep_str);
     JS_FreeValue(ctx, namedCaptures);