diff options
author | Fabrice Bellard <fabrice@bellard.org> | 2023-12-09 12:28:13 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2023-12-09 22:26:51 +0100 |
commit | f5ecf8c6b6756f37bfbfad4cedf91c53c947e707 (patch) | |
tree | d236df210ea7fd732cedd97a928e63b43f4521f4 | |
parent | 617c29296ea9805fb715750333d5d0dcaa19878b (diff) | |
download | chawan-f5ecf8c6b6756f37bfbfad4cedf91c53c947e707.tar.gz |
Retrieve RegExp 'g' flag in spec conformant way (original patch by bnoordhuis)
-rw-r--r-- | lib/quickjs/libregexp.c | 23 | ||||
-rw-r--r-- | lib/quickjs/quickjs.c | 35 |
2 files changed, 41 insertions, 17 deletions
diff --git a/lib/quickjs/libregexp.c b/lib/quickjs/libregexp.c index 0dfd873a..05df8653 100644 --- a/lib/quickjs/libregexp.c +++ b/lib/quickjs/libregexp.c @@ -1071,11 +1071,10 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len) } /* '*pp' is the first char after '<' */ -static int re_parse_group_name(char *buf, int buf_size, - const uint8_t **pp, BOOL is_utf16) +static int re_parse_group_name(char *buf, int buf_size, const uint8_t **pp) { - const uint8_t *p; - uint32_t c; + const uint8_t *p, *p1; + uint32_t c, d; char *q; p = *pp; @@ -1086,11 +1085,18 @@ static int re_parse_group_name(char *buf, int buf_size, p++; if (*p != 'u') return -1; - c = lre_parse_escape(&p, is_utf16 * 2); + c = lre_parse_escape(&p, 2); // accept surrogate pairs } else if (c == '>') { break; } else if (c >= 128) { c = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p); + if (c >= 0xD800 && c <= 0xDBFF) { + d = unicode_from_utf8(p, UTF8_CHAR_LEN_MAX, &p1); + if (d >= 0xDC00 && d <= 0xDFFF) { + c = 0x10000 + 0x400 * (c - 0xD800) + (d - 0xDC00); + p = p1; + } + } } else { p++; } @@ -1140,8 +1146,7 @@ static int re_parse_captures(REParseState *s, int *phas_named_captures, /* potential named capture */ if (capture_name) { p += 3; - if (re_parse_group_name(name, sizeof(name), &p, - s->is_utf16) == 0) { + if (re_parse_group_name(name, sizeof(name), &p) == 0) { if (!strcmp(name, capture_name)) return capture_index; } @@ -1314,7 +1319,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) } else if (p[2] == '<') { p += 3; if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf), - &p, s->is_utf16)) { + &p)) { return re_parse_error(s, "invalid group name"); } if (find_group_name(s, s->u.tmp_buf) > 0) { @@ -1378,7 +1383,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) } p1 += 3; if (re_parse_group_name(s->u.tmp_buf, sizeof(s->u.tmp_buf), - &p1, s->is_utf16)) { + &p1)) { if (s->is_utf16 || re_has_named_captures(s)) return re_parse_error(s, "invalid group name"); else diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c index 3aeab77d..ab69df1b 100644 --- a/lib/quickjs/quickjs.c +++ b/lib/quickjs/quickjs.c @@ -42765,7 +42765,7 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val, { // [Symbol.match](str) JSValueConst rx = this_val; - JSValue A, S, result, matchStr; + JSValue A, S, flags, result, matchStr; int global, n, fullUnicode, isEmpty; JSString *p; @@ -42773,16 +42773,23 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val, return JS_ThrowTypeErrorNotAnObject(ctx); A = JS_UNDEFINED; + flags = JS_UNDEFINED; result = JS_UNDEFINED; matchStr = JS_UNDEFINED; S = JS_ToString(ctx, argv[0]); if (JS_IsException(S)) goto exception; - global = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_global)); - if (global < 0) + flags = JS_GetProperty(ctx, rx, JS_ATOM_flags); + if (JS_IsException(flags)) + goto exception; + flags = JS_ToStringFree(ctx, flags); + if (JS_IsException(flags)) goto exception; + p = JS_VALUE_GET_STRING(flags); + // TODO(bnoordhuis) query 'u' flag the same way? + global = (-1 != string_indexof_char(p, 'g', 0)); if (!global) { A = JS_RegExpExec(ctx, rx, S); } else { @@ -42826,12 +42833,14 @@ static JSValue js_regexp_Symbol_match(JSContext *ctx, JSValueConst this_val, } } JS_FreeValue(ctx, result); + JS_FreeValue(ctx, flags); JS_FreeValue(ctx, S); return A; exception: JS_FreeValue(ctx, A); JS_FreeValue(ctx, result); + JS_FreeValue(ctx, flags); JS_FreeValue(ctx, S); return JS_EXCEPTION; } @@ -43074,8 +43083,8 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, // [Symbol.replace](str, rep) JSValueConst rx = this_val, rep = argv[1]; JSValueConst args[6]; - JSValue str, rep_val, matched, tab, rep_str, namedCaptures, res; - JSString *sp, *rp; + JSValue flags, str, rep_val, matched, tab, rep_str, namedCaptures, res; + JSString *p, *sp, *rp; StringBuffer b_s, *b = &b_s; ValueBuffer v_b, *results = &v_b; int nextSourcePosition, n, j, functionalReplace, is_global, fullUnicode; @@ -43091,6 +43100,7 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, rep_val = JS_UNDEFINED; matched = JS_UNDEFINED; tab = JS_UNDEFINED; + flags = JS_UNDEFINED; rep_str = JS_UNDEFINED; namedCaptures = JS_UNDEFINED; @@ -43107,10 +43117,18 @@ static JSValue js_regexp_Symbol_replace(JSContext *ctx, JSValueConst this_val, goto exception; rp = JS_VALUE_GET_STRING(rep_val); } - fullUnicode = 0; - is_global = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_global)); - if (is_global < 0) + + flags = JS_GetProperty(ctx, rx, JS_ATOM_flags); + if (JS_IsException(flags)) + goto exception; + flags = JS_ToStringFree(ctx, flags); + if (JS_IsException(flags)) goto exception; + p = JS_VALUE_GET_STRING(flags); + + // TODO(bnoordhuis) query 'u' flag the same way? + fullUnicode = 0; + is_global = (-1 != string_indexof_char(p, 'g', 0)); if (is_global) { fullUnicode = JS_ToBoolFree(ctx, JS_GetProperty(ctx, rx, JS_ATOM_unicode)); if (fullUnicode < 0) @@ -43244,6 +43262,7 @@ done1: value_buffer_free(results); JS_FreeValue(ctx, rep_val); JS_FreeValue(ctx, matched); + JS_FreeValue(ctx, flags); JS_FreeValue(ctx, tab); JS_FreeValue(ctx, rep_str); JS_FreeValue(ctx, namedCaptures); |