diff options
author | Charlie Gordon <github@chqrlie.org> | 2024-02-20 00:22:32 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-03-02 18:12:24 +0100 |
commit | d4885cdedbe2d2bb366a178e54622893898a4793 (patch) | |
tree | bae983ae428d3fd3b927d98927d6a616d39cfd4f /lib/quickjs/quickjs.c | |
parent | 566a850964945d12e95fa6290f63783977038f53 (diff) | |
download | chawan-d4885cdedbe2d2bb366a178e54622893898a4793.tar.gz |
Improve surrogate handling readability
- add inline function to test and convert surrogates is_surrogate(c), is_hi_surrogate(c), is_lo_surrogate(c), get_hi_surrogate(c), get_lo_surrogate(c), from_surrogate(hi, lo) - use names for BC header offsets and lengths in libregexp.c - remove strict aliasing violations in `lre_exec_backtrack()` - pass all context variables to XXX_CHAR macros in `lre_exec_backtrack()`
Diffstat (limited to 'lib/quickjs/quickjs.c')
-rw-r--r-- | lib/quickjs/quickjs.c | 76 |
1 files changed, 34 insertions, 42 deletions
diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c index 63024416..4e5f9fb7 100644 --- a/lib/quickjs/quickjs.c +++ b/lib/quickjs/quickjs.c @@ -3690,10 +3690,9 @@ static int string_buffer_putc(StringBuffer *s, uint32_t c) { if (unlikely(c >= 0x10000)) { /* surrogate pair */ - c -= 0x10000; - if (string_buffer_putc16(s, (c >> 10) + 0xd800)) + if (string_buffer_putc16(s, get_hi_surrogate(c))) return -1; - c = (c & 0x3ff) + 0xdc00; + c = get_lo_surrogate(c); } return string_buffer_putc16(s, c); } @@ -3704,10 +3703,10 @@ static int string_getc(const JSString *p, int *pidx) idx = *pidx; if (p->is_wide_char) { c = p->u.str16[idx++]; - if (c >= 0xd800 && c < 0xdc00 && idx < p->len) { + if (is_hi_surrogate(c) && idx < p->len) { c1 = p->u.str16[idx]; - if (c1 >= 0xdc00 && c1 < 0xe000) { - c = (((c & 0x3ff) << 10) | (c1 & 0x3ff)) + 0x10000; + if (is_lo_surrogate(c1)) { + c = from_surrogate(c, c1); idx++; } } @@ -3905,9 +3904,8 @@ JSValue JS_NewStringLen(JSContext *ctx, const char *buf, size_t buf_len) } else if (c <= 0x10FFFF) { p = p_next; /* surrogate pair */ - c -= 0x10000; - string_buffer_putc16(b, (c >> 10) + 0xd800); - c = (c & 0x3ff) + 0xdc00; + string_buffer_putc16(b, get_hi_surrogate(c)); + c = get_lo_surrogate(c); } else { /* invalid char */ c = 0xfffd; @@ -4045,13 +4043,12 @@ const char *JS_ToCStringLen2(JSContext *ctx, size_t *plen, JSValueConst val1, BO if (c < 0x80) { *q++ = c; } else { - if (c >= 0xd800 && c < 0xdc00) { + if (is_hi_surrogate(c)) { if (pos < len && !cesu8) { c1 = src[pos]; - if (c1 >= 0xdc00 && c1 < 0xe000) { + if (is_lo_surrogate(c1)) { pos++; - /* surrogate pair */ - c = (((c & 0x3ff) << 10) | (c1 & 0x3ff)) + 0x10000; + c = from_surrogate(c, c1); } else { /* Keep unmatched surrogate code points */ /* c = 0xfffd; */ /* error */ @@ -11831,7 +11828,7 @@ static JSValue JS_ToQuotedString(JSContext *ctx, JSValueConst val1) goto fail; break; default: - if (c < 32 || (c >= 0xd800 && c < 0xe000)) { + if (c < 32 || is_surrogate(c)) { snprintf(buf, sizeof(buf), "\\u%04x", c); if (string_buffer_puts8(b, buf)) goto fail; @@ -41691,18 +41688,18 @@ static int64_t string_advance_index(JSString *p, int64_t index, BOOL unicode) -1 if none */ static int js_string_find_invalid_codepoint(JSString *p) { - int i, c; + int i; if (!p->is_wide_char) return -1; for(i = 0; i < p->len; i++) { - c = p->u.str16[i]; - if (c >= 0xD800 && c <= 0xDFFF) { - if (c >= 0xDC00 || (i + 1) >= p->len) - return i; - c = p->u.str16[i + 1]; - if (c < 0xDC00 || c > 0xDFFF) + uint32_t c = p->u.str16[i]; + if (is_surrogate(c)) { + if (is_hi_surrogate(c) && (i + 1) < p->len + && is_lo_surrogate(p->u.str16[i + 1])) { + i++; + } else { return i; - i++; + } } } return -1; @@ -41729,7 +41726,7 @@ static JSValue js_string_toWellFormed(JSContext *ctx, JSValueConst this_val, { JSValue str, ret; JSString *p; - int c, i; + int i; str = JS_ToStringCheckObject(ctx, this_val); if (JS_IsException(str)) @@ -41748,17 +41745,13 @@ static JSValue js_string_toWellFormed(JSContext *ctx, JSValueConst this_val, p = JS_VALUE_GET_STRING(ret); for (; i < p->len; i++) { - c = p->u.str16[i]; - if (c >= 0xD800 && c <= 0xDFFF) { - if (c >= 0xDC00 || (i + 1) >= p->len) { - p->u.str16[i] = 0xFFFD; + uint32_t c = p->u.str16[i]; + if (is_surrogate(c)) { + if (is_hi_surrogate(c) && (i + 1) < p->len + && is_lo_surrogate(p->u.str16[i + 1])) { + i++; } else { - c = p->u.str16[i + 1]; - if (c < 0xDC00 || c > 0xDFFF) { - p->u.str16[i] = 0xFFFD; - } else { - i++; - } + p->u.str16[i] = 0xFFFD; } } } @@ -42535,10 +42528,10 @@ static int string_prevc(JSString *p, int *pidx) idx--; if (p->is_wide_char) { c = p->u.str16[idx]; - if (c >= 0xdc00 && c < 0xe000 && idx > 0) { + if (is_lo_surrogate(c) && idx > 0) { c1 = p->u.str16[idx - 1]; - if (c1 >= 0xd800 && c1 <= 0xdc00) { - c = (((c1 & 0x3ff) << 10) | (c & 0x3ff)) + 0x10000; + if (is_hi_surrogate(c1)) { + c = from_surrogate(c1, c); idx--; } } @@ -49222,8 +49215,7 @@ static JSValue js_global_decodeURI(JSContext *ctx, JSValueConst this_val, } c = (c << 6) | (c1 & 0x3f); } - if (c < c_min || c > 0x10FFFF || - (c >= 0xd800 && c < 0xe000)) { + if (c < c_min || c > 0x10FFFF || is_surrogate(c)) { js_throw_URIError(ctx, "malformed UTF-8"); goto fail; } @@ -49298,21 +49290,21 @@ static JSValue js_global_encodeURI(JSContext *ctx, JSValueConst this_val, if (isURIUnescaped(c, isComponent)) { string_buffer_putc16(b, c); } else { - if (c >= 0xdc00 && c <= 0xdfff) { + if (is_lo_surrogate(c)) { js_throw_URIError(ctx, "invalid character"); goto fail; - } else if (c >= 0xd800 && c <= 0xdbff) { + } else if (is_hi_surrogate(c)) { if (k >= p->len) { js_throw_URIError(ctx, "expecting surrogate pair"); goto fail; } c1 = string_get(p, k); k++; - if (c1 < 0xdc00 || c1 > 0xdfff) { + if (!is_lo_surrogate(c1)) { js_throw_URIError(ctx, "expecting surrogate pair"); goto fail; } - c = (((c & 0x3ff) << 10) | (c1 & 0x3ff)) + 0x10000; + c = from_surrogate(c, c1); } if (c < 0x80) { encodeURI_hex(b, c); |