diff options
author | Charlie Gordon <github@chqrlie.org> | 2024-02-25 23:47:26 +0100 |
---|---|---|
committer | bptato <nincsnevem662@gmail.com> | 2024-03-02 18:12:24 +0100 |
commit | 712d129700e7e70f93f379f8f8249ea57e573310 (patch) | |
tree | 7fb337028c210f80dabacf0b68cf71dff3de4d9c /lib | |
parent | 3bce5ee06cf475bf60c8fd65cae556fb708f88ea (diff) | |
download | chawan-712d129700e7e70f93f379f8f8249ea57e573310.tar.gz |
Improve Date.parse
- rewrite Date.parse() with separate parsers - return `NaN` for out of bounds field values as specified - accept up to 9 decimals for millisecond fraction but truncate at 3 - accept many more alternative date/time formats - add test cases in tests/test_builtin.js
Diffstat (limited to 'lib')
-rw-r--r-- | lib/quickjs/quickjs.c | 529 |
1 files changed, 305 insertions, 224 deletions
diff --git a/lib/quickjs/quickjs.c b/lib/quickjs/quickjs.c index 927f97d2..36720661 100644 --- a/lib/quickjs/quickjs.c +++ b/lib/quickjs/quickjs.c @@ -49873,7 +49873,7 @@ static JSValue js_Date_UTC(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { // UTC(y, mon, d, h, m, s, ms) - double fields[] = { 0, 0, 1, 0, 0, 0, 0 }; + double fields[9] = { 0, 0, 1, 0, 0, 0, 0, 0, 0 }; int i, n; double a; @@ -49894,145 +49894,338 @@ static JSValue js_Date_UTC(JSContext *ctx, JSValueConst this_val, return JS_NewFloat64(ctx, set_date_fields(fields, 0)); } -static void string_skip_spaces(JSString *sp, int *pp) { - while (*pp < sp->len && string_get(sp, *pp) == ' ') +/* Date string parsing */ + +static BOOL string_skip_char(const uint8_t *sp, int *pp, int c) { + if (sp[*pp] == c) { *pp += 1; + return TRUE; + } else { + return FALSE; + } } -static void string_skip_non_spaces(JSString *sp, int *pp) { - while (*pp < sp->len && string_get(sp, *pp) != ' ') +/* skip spaces, update offset */ +static void string_skip_spaces(const uint8_t *sp, int *pp) { + while (sp[*pp] == ' ') *pp += 1; } -/* parse a numeric field with an optional sign if accept_sign is TRUE */ -static int string_get_digits(JSString *sp, int *pp, int64_t *pval) { - int64_t v = 0; +/* skip dashes dots and commas */ +static void string_skip_separators(const uint8_t *sp, int *pp) { + int c; + while ((c = sp[*pp]) == '-' || c == '.' || c == ',') + *pp += 1; +} + +/* skip non spaces, update offset */ +static void string_skip_non_spaces(const uint8_t *sp, int *pp) { + while (sp[*pp] != '\0' && sp[*pp] != ' ') + *pp += 1; +} + +/* parse a numeric field (max_digits = 0 -> no maximum) */ +static BOOL string_get_digits(const uint8_t *sp, int *pp, int *pval, + int min_digits, int max_digits) +{ + int v = 0; int c, p = *pp, p_start; - if (p >= sp->len) - return -1; p_start = p; - while (p < sp->len) { - c = string_get(sp, p); - if (!(c >= '0' && c <= '9')) { - if (p == p_start) - return -1; - else - break; - } + while ((c = sp[p]) >= '0' && c <= '9') { v = v * 10 + c - '0'; p++; + if (p - p_start == max_digits) + break; } + if (p - p_start < min_digits) + return FALSE; *pval = v; *pp = p; - return 0; + return TRUE; } -static int string_get_signed_digits(JSString *sp, int *pp, int64_t *pval) { - int res, sgn, p = *pp; +static BOOL string_get_milliseconds(const uint8_t *sp, int *pp, int *pval) { + /* parse optional fractional part as milliseconds and truncate. */ + /* spec does not indicate which rounding should be used */ + int mul = 1000, ms = 0, c, p_start, p = *pp; - if (p >= sp->len) - return -1; - - sgn = string_get(sp, p); - if (sgn == '-' || sgn == '+') + c = sp[p]; + if (c == '.' || c == ',') { p++; - - res = string_get_digits(sp, &p, pval); - if (res == 0 && sgn == '-') { - if (*pval == 0) - return -1; // reject negative zero - *pval = -*pval; + p_start = p; + while ((c = sp[p]) >= '0' && c <= '9') { + ms += (c - '0') * (mul /= 10); + p++; + if (p - p_start == 9) + break; + } + if (p > p_start) { + /* only consume the separator if digits are present */ + *pval = ms; + *pp = p; + } } - *pp = p; - return res; + return TRUE; } -/* parse a fixed width numeric field */ -static int string_get_fixed_width_digits(JSString *sp, int *pp, int n, int64_t *pval) { - int64_t v = 0; - int i, c, p = *pp; +static BOOL string_get_timezone(const uint8_t *sp, int *pp, int *tzp) { + int tz = 0, sgn, hh, mm, p = *pp; - for(i = 0; i < n; i++) { - if (p >= sp->len) - return -1; - c = string_get(sp, p); - if (!(c >= '0' && c <= '9')) - return -1; - v = v * 10 + c - '0'; + sgn = sp[p]; + if (sgn == '+' || sgn == '-') { p++; + if (!string_get_digits(sp, &p, &hh, 2, 2)) + return FALSE; + string_skip_char(sp, &p, ':'); /* optional separator */ + if (!string_get_digits(sp, &p, &mm, 2, 2)) + return FALSE; + if (hh > 23 || mm > 59) + return FALSE; + tz = hh * 60 + mm; + if (sgn != '+') + tz = -tz; + } else + if (sgn == 'Z') { + p++; + } else { + return FALSE; } - *pval = v; *pp = p; - return 0; + *tzp = tz; + return TRUE; } -static int string_get_milliseconds(JSString *sp, int *pp, int64_t *pval) { - /* parse milliseconds as a fractional part, round to nearest */ - /* XXX: the spec does not indicate which rounding should be used */ - int mul = 1000, ms = 0, p = *pp, c, p_start; - if (p >= sp->len) - return -1; - p_start = p; - while (p < sp->len) { - c = string_get(sp, p); - if (!(c >= '0' && c <= '9')) { - if (p == p_start) - return -1; - else - break; - } - if (mul == 1 && c >= '5') - ms += 1; - ms += (c - '0') * (mul /= 10); +static BOOL string_match(const uint8_t *sp, int *pp, const char *s) { + int p = *pp; + while (*s != '\0') { + if (sp[p] != (uint8_t)*s++) + return FALSE; p++; } - *pval = ms; *pp = p; - return 0; + return TRUE; } - -static int find_abbrev(JSString *sp, int p, const char *list, int count) { +static int find_abbrev(const uint8_t *sp, int p, const char *list, int count) { int n, i; - if (p + 3 <= sp->len) { - for (n = 0; n < count; n++) { - for (i = 0; i < 3; i++) { - if (string_get(sp, p + i) != month_names[n * 3 + i]) - goto next; - } - return n; - next:; + for (n = 0; n < count; n++) { + for (i = 0;; i++) { + if (sp[p + i] != (uint8_t)month_names[n * 3 + i]) + break; + if (i == 2) + return n; } } return -1; } -static int string_get_month(JSString *sp, int *pp, int64_t *pval) { +static BOOL string_get_month(const uint8_t *sp, int *pp, int *pval) { int n; - string_skip_spaces(sp, pp); n = find_abbrev(sp, *pp, month_names, 12); if (n < 0) - return -1; + return FALSE; - *pval = n; + *pval = n + 1; *pp += 3; - return 0; + return TRUE; +} + +/* parse toISOString format */ +static BOOL js_date_parse_isostring(const uint8_t *sp, int fields[9], BOOL *is_local) { + int sgn, i, p = 0; + + /* initialize fields to the beginning of the Epoch */ + for (i = 0; i < 9; i++) { + fields[i] = (i == 2); + } + *is_local = FALSE; + + /* year is either yyyy digits or [+-]yyyyyy */ + sgn = sp[p]; + if (sgn == '-' || sgn == '+') { + p++; + if (!string_get_digits(sp, &p, &fields[0], 6, 6)) + return FALSE; + if (sgn == '-') { + if (fields[0] == 0) + return FALSE; // reject -000000 + fields[0] = -fields[0]; + } + } else { + if (!string_get_digits(sp, &p, &fields[0], 4, 4)) + return FALSE; + } + if (string_skip_char(sp, &p, '-')) { + if (!string_get_digits(sp, &p, &fields[1], 2, 2)) /* month */ + return FALSE; + if (fields[1] < 1) + return FALSE; + fields[1] -= 1; + if (string_skip_char(sp, &p, '-')) { + if (!string_get_digits(sp, &p, &fields[2], 2, 2)) /* day */ + return FALSE; + if (fields[2] < 1) + return FALSE; + } + } + if (string_skip_char(sp, &p, 'T')) { + *is_local = TRUE; + if (!string_get_digits(sp, &p, &fields[3], 2, 2) /* hour */ + || !string_skip_char(sp, &p, ':') + || !string_get_digits(sp, &p, &fields[4], 2, 2)) /* minute */ + return FALSE; + if (string_skip_char(sp, &p, ':')) { + if (!string_get_digits(sp, &p, &fields[5], 2, 2)) /* second */ + return FALSE; + string_get_milliseconds(sp, &p, &fields[6]); + } + } + /* parse the time zone offset if present: [+-]HH:mm or [+-]HHmm */ + if (sp[p]) { + *is_local = FALSE; + if (!string_get_timezone(sp, &p, &fields[8])) + return FALSE; + } + /* error if extraneous characters */ + return sp[p] == '\0'; +} + +/* parse toString, toUTCString and other formats */ +static BOOL js_date_parse_otherstring(const uint8_t *sp, int fields[9], BOOL *is_local) { + int c, i, val, p = 0, p_start; + int num[3]; + BOOL has_year = FALSE; + BOOL has_mon = FALSE; + BOOL has_time = FALSE; + int num_index = 0; + + /* initialize fields to the beginning of 2001-01-01 */ + fields[0] = 2001; + fields[1] = 1; + fields[2] = 1; + for (i = 3; i < 9; i++) { + fields[i] = 0; + } + *is_local = TRUE; + + while (sp[p] != '\0') { + string_skip_spaces(sp, &p); + p_start = p; + if ((c = sp[p]) == '+' || c == '-') { + if (has_time && string_get_timezone(sp, &p, &fields[8])) { + *is_local = FALSE; + } else { + p++; + if (string_get_digits(sp, &p, &val, 1, 9)) { + if (c == '-') { + if (val == 0) + return FALSE; + val = -val; + } + fields[0] = val; + has_year = TRUE; + } + } + } else + if (string_get_digits(sp, &p, &val, 1, 9)) { + if (string_skip_char(sp, &p, ':')) { + /* time part */ + fields[3] = val; + if (!string_get_digits(sp, &p, &fields[4], 1, 2)) + return FALSE; + if (string_skip_char(sp, &p, ':')) { + if (!string_get_digits(sp, &p, &fields[5], 1, 2)) + return FALSE; + string_get_milliseconds(sp, &p, &fields[6]); + } + has_time = TRUE; + } else { + if (p - p_start > 2) { + fields[0] = val; + has_year = TRUE; + } else + if (val < 1 || val > 31) { + fields[0] = val + (val < 100) * 1900; + has_year = TRUE; + } else { + if (num_index == 3) + return FALSE; + num[num_index++] = val; + } + } + } else + if (string_get_month(sp, &p, &fields[1])) { + has_mon = TRUE; + } else + if (c == 'Z') { + *is_local = FALSE; + p++; + continue; + } else + if (string_match(sp, &p, "GMT") || string_match(sp, &p, "UTC")) { + *is_local = FALSE; + continue; + } else { + /* skip a word */ + string_skip_non_spaces(sp, &p); + continue; + } + string_skip_separators(sp, &p); + } + if (num_index + has_year + has_mon > 3) + return FALSE; + + switch (num_index) { + case 0: + if (!has_year) + return FALSE; + break; + case 1: + if (has_mon) + fields[2] = num[0]; + else + fields[1] = num[0]; + break; + case 2: + if (has_year) { + fields[1] = num[0]; + fields[2] = num[1]; + } else + if (has_mon) { + fields[0] = num[1] + (num[1] < 100) * 1900; + fields[2] = num[0]; + } else { + fields[1] = num[0]; + fields[2] = num[1]; + } + break; + case 3: + fields[0] = num[2] + (num[2] < 100) * 1900; + fields[1] = num[0]; + fields[2] = num[1]; + break; + default: + return FALSE; + } + if (fields[1] < 1 || fields[2] < 1) + return FALSE; + fields[1] -= 1; + return TRUE; } static JSValue js_Date_parse(JSContext *ctx, JSValueConst this_val, int argc, JSValueConst *argv) { - // parse(s) JSValue s, rv; - int64_t fields[] = { 0, 1, 1, 0, 0, 0, 0 }; - double fields1[7]; - int64_t tz, hh, mm; + int fields[9]; + double fields1[9]; double d; - int p, i, c, sgn, l; + int i, c; JSString *sp; + uint8_t buf[128]; BOOL is_local; rv = JS_NAN; @@ -50042,145 +50235,33 @@ static JSValue js_Date_parse(JSContext *ctx, JSValueConst this_val, return JS_EXCEPTION; sp = JS_VALUE_GET_STRING(s); - p = 0; - if (p < sp->len && (((c = string_get(sp, p)) >= '0' && c <= '9') || c == '+' || c == '-')) { - /* ISO format */ - /* year field can be negative */ - if (string_get_signed_digits(sp, &p, &fields[0])) - goto done; - - for (i = 1; i < 7; i++) { - if (p >= sp->len) - break; - switch(i) { - case 1: - case 2: - c = '-'; - break; - case 3: - c = 'T'; - break; - case 4: - case 5: - c = ':'; - break; - case 6: - c = '.'; - break; - } - if (string_get(sp, p) != c) - break; - p++; - if (i == 6) { - if (string_get_milliseconds(sp, &p, &fields[i])) - goto done; - } else { - if (string_get_digits(sp, &p, &fields[i])) - goto done; - } - } - /* no time: UTC by default */ - is_local = (i > 3); - fields[1] -= 1; - - /* parse the time zone offset if present: [+-]HH:mm or [+-]HHmm */ - tz = 0; - if (p < sp->len) { - sgn = string_get(sp, p); - if (sgn == '+' || sgn == '-') { - p++; - l = sp->len - p; - if (l != 4 && l != 5) - goto done; - if (string_get_fixed_width_digits(sp, &p, 2, &hh)) - goto done; - if (l == 5) { - if (string_get(sp, p) != ':') - goto done; - p++; - } - if (string_get_fixed_width_digits(sp, &p, 2, &mm)) - goto done; - tz = hh * 60 + mm; - if (sgn == '-') - tz = -tz; - is_local = FALSE; - } else if (sgn == 'Z') { - p++; - is_local = FALSE; - } else { - goto done; - } - /* error if extraneous characters */ - if (p != sp->len) - goto done; - } - } else { - /* toString or toUTCString format */ - /* skip the day of the week */ - string_skip_non_spaces(sp, &p); - string_skip_spaces(sp, &p); - if (p >= sp->len) - goto done; - c = string_get(sp, p); - if (c >= '0' && c <= '9') { - /* day of month first */ - if (string_get_digits(sp, &p, &fields[2])) - goto done; - if (string_get_month(sp, &p, &fields[1])) - goto done; - } else { - /* month first */ - if (string_get_month(sp, &p, &fields[1])) - goto done; - string_skip_spaces(sp, &p); - if (string_get_digits(sp, &p, &fields[2])) - goto done; - } - /* year */ - string_skip_spaces(sp, &p); - if (string_get_signed_digits(sp, &p, &fields[0])) - goto done; - - /* hour, min, seconds */ - string_skip_spaces(sp, &p); - for(i = 0; i < 3; i++) { - if (i == 1 || i == 2) { - if (p >= sp->len) - goto done; - if (string_get(sp, p) != ':') - goto done; - p++; - } - if (string_get_digits(sp, &p, &fields[3 + i])) - goto done; - } - // XXX: parse optional milliseconds? - - /* parse the time zone offset if present: [+-]HHmm */ - is_local = FALSE; - tz = 0; - for (tz = 0; p < sp->len; p++) { - sgn = string_get(sp, p); - if (sgn == '+' || sgn == '-') { - p++; - if (string_get_fixed_width_digits(sp, &p, 2, &hh)) - goto done; - if (string_get_fixed_width_digits(sp, &p, 2, &mm)) - goto done; - tz = hh * 60 + mm; - if (sgn == '-') - tz = -tz; - break; - } + /* convert the string as a byte array */ + for (i = 0; i < sp->len && i < (int)countof(buf) - 1; i++) { + c = string_get(sp, i); + if (c > 255) + c = (c == 0x2212) ? '-' : 'x'; + buf[i] = c; + } + buf[i] = '\0'; + if (js_date_parse_isostring(buf, fields, &is_local) + || js_date_parse_otherstring(buf, fields, &is_local)) { + static int const field_max[6] = { 0, 11, 31, 24, 59, 59 }; + BOOL valid = TRUE; + /* check field maximum values */ + for (i = 1; i < 6; i++) { + if (fields[i] > field_max[i]) + valid = FALSE; + } + /* special case 24:00:00.000 */ + if (fields[3] == 24 && (fields[4] | fields[5] | fields[6])) + valid = FALSE; + if (valid) { + for(i = 0; i < 7; i++) + fields1[i] = fields[i]; + d = set_date_fields(fields1, is_local) - fields[8] * 60000; + rv = JS_NewFloat64(ctx, d); } } - for(i = 0; i < 7; i++) - fields1[i] = fields[i]; - d = set_date_fields(fields1, is_local) - tz * 60000; - rv = JS_NewFloat64(ctx, d); - -done: JS_FreeValue(ctx, s); return rv; } |