diff options
Diffstat (limited to 'lib/quickjs/libregexp.c')
-rw-r--r-- | lib/quickjs/libregexp.c | 98 |
1 files changed, 49 insertions, 49 deletions
diff --git a/lib/quickjs/libregexp.c b/lib/quickjs/libregexp.c index 34a8c013..6c3ee19e 100644 --- a/lib/quickjs/libregexp.c +++ b/lib/quickjs/libregexp.c @@ -1,6 +1,6 @@ /* * Regular Expression Engine - * + * * Copyright (c) 2017-2018 Fabrice Bellard * * Permission is hereby granted, free of charge, to any person obtaining a copy @@ -196,7 +196,7 @@ static int cr_init_char_range(REParseState *s, CharRange *cr, uint32_t c) BOOL invert; const uint16_t *c_pt; int len, i; - + invert = c & 1; c_pt = char_range_table[c >> 1]; len = *c_pt++; @@ -221,7 +221,7 @@ static __maybe_unused void lre_dump_bytecode(const uint8_t *buf, { int pos, len, opcode, bc_len, re_flags, i; uint32_t val; - + assert(buf_len >= RE_HEADER_LEN); re_flags= buf[0]; @@ -392,7 +392,7 @@ static int parse_digits(const uint8_t **pp, BOOL allow_overflow) const uint8_t *p; uint64_t v; int c; - + p = *pp; v = 0; for(;;) { @@ -464,7 +464,7 @@ int lre_parse_escape(const uint8_t **pp, int allow_utf16) { int h, n, i; uint32_t c1; - + if (*p == '{' && allow_utf16) { p++; c = 0; @@ -658,7 +658,7 @@ static int get_class_atom(REParseState *s, CharRange *cr, const uint8_t *p; uint32_t c; int ret; - + p = *pp; c = *p; @@ -766,7 +766,7 @@ static int re_emit_range(REParseState *s, const CharRange *cr) { int len, i; uint32_t high; - + len = (unsigned)cr->len / 2; if (len >= 65535) return re_parse_error(s, "too many ranges"); @@ -807,7 +807,7 @@ static int re_parse_char_class(REParseState *s, const uint8_t **pp) CharRange cr_s, *cr = &cr_s; CharRange cr1_s, *cr1 = &cr1_s; BOOL invert; - + cr_init(cr, s->opaque, lre_realloc); p = *pp; p++; /* skip '[' */ @@ -895,7 +895,7 @@ static BOOL re_need_check_advance(const uint8_t *bc_buf, int bc_buf_len) int pos, opcode, len; uint32_t val; BOOL ret; - + ret = TRUE; pos = 0; while (pos < bc_buf_len) { @@ -948,7 +948,7 @@ static int re_is_simple_quantifier(const uint8_t *bc_buf, int bc_buf_len) { int pos, opcode, len, count; uint32_t val; - + count = 0; pos = 0; while (pos < bc_buf_len) { @@ -1113,7 +1113,7 @@ static int find_group_name(REParseState *s, const char *name) const char *p, *buf_end; size_t len, name_len; int capture_index; - + name_len = strlen(name); p = (char *)s->group_names.buf; buf_end = (char *)s->group_names.buf + s->group_names.size; @@ -1136,7 +1136,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) int c, last_atom_start, quant_min, quant_max, last_capture_count; BOOL greedy, add_zero_advance_check, is_neg, is_backward_lookahead; CharRange cr_s, *cr = &cr_s; - + last_atom_start = -1; last_capture_count = 0; p = s->buf_ptr; @@ -1259,15 +1259,15 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) capture_index = s->capture_count++; re_emit_op_u8(s, REOP_save_start + is_backward_dir, capture_index); - + s->buf_ptr = p; if (re_parse_disjunction(s, is_backward_dir)) return -1; p = s->buf_ptr; - + re_emit_op_u8(s, REOP_save_start + 1 - is_backward_dir, capture_index); - + if (re_parse_expect(s, &p, ')')) return -1; } @@ -1283,7 +1283,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) { const uint8_t *p1; int dummy_res; - + p1 = p; if (p1[2] != '<') { /* annex B: we tolerate invalid group names in non @@ -1336,10 +1336,10 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) goto normal_char; case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': - case '9': + case '9': { const uint8_t *q = ++p; - + c = parse_digits(&p, FALSE); if (c < 0 || (c >= s->capture_count && c >= re_count_captures(s))) { if (!s->is_utf16) { @@ -1480,7 +1480,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) } if (greedy) { int len, pos; - + if (quant_max > 0) { /* specific optimization for simple quantifiers */ if (dbuf_error(&s->byte_code)) @@ -1489,7 +1489,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) s->byte_code.size - last_atom_start); if (len > 0) { re_emit_op(s, REOP_match); - + if (dbuf_insert(&s->byte_code, last_atom_start, 17)) goto out_of_memory; pos = last_atom_start; @@ -1506,7 +1506,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) goto done; } } - + if (dbuf_error(&s->byte_code)) goto out_of_memory; /* the spec tells that if there is no advance when @@ -1518,7 +1518,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) } else { add_zero_advance_check = FALSE; } - + { int len, pos; len = s->byte_code.size - last_atom_start; @@ -1544,7 +1544,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) len + 5 * has_goto + add_zero_advance_check * 2); if (add_zero_advance_check) { s->byte_code.buf[last_atom_start + 1 + 4] = REOP_push_char_pos; - re_emit_op(s, REOP_check_advance); + re_emit_op(s, REOP_check_advance); } if (has_goto) re_emit_goto(s, REOP_goto, last_atom_start); @@ -1560,7 +1560,7 @@ static int re_parse_term(REParseState *s, BOOL is_backward_dir) pos += 4; if (add_zero_advance_check) { s->byte_code.buf[pos++] = REOP_push_char_pos; - re_emit_op(s, REOP_check_advance); + re_emit_op(s, REOP_check_advance); } re_emit_goto(s, REOP_loop, last_atom_start + 5); re_emit_op(s, REOP_drop); @@ -1655,14 +1655,14 @@ static int re_parse_alternative(REParseState *s, BOOL is_backward_dir) } return 0; } - + static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir) { int start, len, pos; if (lre_check_stack_overflow(s->opaque, 0)) return re_parse_error(s, "stack overflow"); - + start = s->byte_code.size; if (re_parse_alternative(s, is_backward_dir)) return -1; @@ -1682,7 +1682,7 @@ static int re_parse_disjunction(REParseState *s, BOOL is_backward_dir) if (re_parse_alternative(s, is_backward_dir)) return -1; - + /* patch the goto */ len = s->byte_code.size - (pos + 4); put_u32(s->byte_code.buf + pos, len); @@ -1695,7 +1695,7 @@ static int compute_stack_size(const uint8_t *bc_buf, int bc_buf_len) { int stack_size, stack_size_max, pos, opcode, len; uint32_t val; - + stack_size = 0; stack_size_max = 0; bc_buf += RE_HEADER_LEN; @@ -1746,7 +1746,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, REParseState s_s, *s = &s_s; int stack_size; BOOL is_sticky; - + memset(s, 0, sizeof(*s)); s->opaque = opaque; s->buf_ptr = (const uint8_t *)buf; @@ -1760,7 +1760,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, s->capture_count = 1; s->total_capture_count = -1; s->has_named_captures = -1; - + dbuf_init2(&s->byte_code, opaque, lre_realloc); dbuf_init2(&s->group_names, opaque, lre_realloc); @@ -1768,7 +1768,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, dbuf_putc(&s->byte_code, 0); /* second element is the number of captures */ dbuf_putc(&s->byte_code, 0); /* stack size */ dbuf_put_u32(&s->byte_code, 0); /* bytecode length */ - + if (!is_sticky) { /* iterate thru all positions (about the same as .*?( ... ) ) . We do it without an explicit loop so that lock step @@ -1790,7 +1790,7 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, } re_emit_op_u8(s, REOP_save_end, 0); - + re_emit_op(s, REOP_match); if (*s->buf_ptr != '\0') { @@ -1802,13 +1802,13 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, re_parse_out_of_memory(s); goto error; } - + stack_size = compute_stack_size(s->byte_code.buf, s->byte_code.size); if (stack_size < 0) { re_parse_error(s, "too many imbricated quantifiers"); goto error; } - + s->byte_code.buf[RE_HEADER_CAPTURE_COUNT] = s->capture_count; s->byte_code.buf[RE_HEADER_STACK_SIZE] = stack_size; put_u32(s->byte_code.buf + 3, s->byte_code.size - RE_HEADER_LEN); @@ -1819,11 +1819,11 @@ uint8_t *lre_compile(int *plen, char *error_msg, int error_msg_size, s->byte_code.buf[RE_HEADER_FLAGS] |= LRE_FLAG_NAMED_GROUPS; } dbuf_free(&s->group_names); - + #ifdef DUMP_REOP lre_dump_bytecode(s->byte_code.buf, s->byte_code.size); #endif - + error_msg[0] = '\0'; *plen = s->byte_code.size; return s->byte_code.buf; @@ -1974,7 +1974,7 @@ typedef struct { const uint8_t *cbuf_end; /* 0 = 8 bit chars, 1 = 16 bit chars, 2 = 16 bit chars, UTF-16, 3 = 8 bit chars, UTF-8 */ - int cbuf_type; + int cbuf_type; int capture_count; int stack_size_max; BOOL multi_line; @@ -2036,7 +2036,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, int cbuf_type; uint32_t val, c; const uint8_t *cbuf_end; - + cbuf_type = s->cbuf_type; cbuf_end = s->cbuf_end; @@ -2134,7 +2134,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, case REOP_split_next_first: { const uint8_t *pc1; - + val = get_u32(pc); pc += 4; if (opcode == REOP_split_next_first) { @@ -2160,7 +2160,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, if (ret < 0) return -1; break; - + case REOP_goto: val = get_u32(pc); pc += 4 + (int)val; @@ -2264,7 +2264,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, { const uint8_t *cptr1, *cptr1_end, *cptr1_start; uint32_t c1, c2; - + val = *pc++; if (val >= s->capture_count) goto no_match; @@ -2307,7 +2307,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, { int n; uint32_t low, high, idx_min, idx_max, idx; - + n = get_u16(pc); /* n must be >= 1 */ pc += 2; if (cptr >= cbuf_end) @@ -2347,7 +2347,7 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, { int n; uint32_t low, high, idx_min, idx_max, idx; - + n = get_u16(pc); /* n must be >= 1 */ pc += 2; if (cptr >= cbuf_end) @@ -2392,14 +2392,14 @@ static intptr_t lre_exec_backtrack(REExecContext *s, uint8_t **capture, size_t q; intptr_t res; const uint8_t *pc1; - + next_pos = get_u32(pc); quant_min = get_u32(pc + 4); quant_max = get_u32(pc + 8); pc += 16; pc1 = pc; pc += (int)next_pos; - + q = 0; for(;;) { res = lre_exec_backtrack(s, capture, stack, stack_len, @@ -2442,7 +2442,7 @@ int lre_exec(uint8_t **capture, REExecContext s_s, *s = &s_s; int re_flags, i, alloca_size, ret, cbuf_width = cbuf_type; StackInt *stack_buf; - + if (cbuf_width == 3) /* UTF-8 */ cbuf_width = 0; re_flags = bc_buf[RE_HEADER_FLAGS]; @@ -2464,7 +2464,7 @@ int lre_exec(uint8_t **capture, s->state_stack = NULL; s->state_stack_len = 0; s->state_stack_size = 0; - + for(i = 0; i < s->capture_count * 2; i++) capture[i] = NULL; alloca_size = s->stack_size_max * sizeof(stack_buf[0]); @@ -2516,7 +2516,7 @@ int main(int argc, char **argv) uint8_t *capture[CAPTURE_COUNT_MAX * 2]; const char *input; int input_len, capture_count; - + if (argc < 3) { printf("usage: %s regexp input\n", argv[0]); exit(1); @@ -2530,7 +2530,7 @@ int main(int argc, char **argv) input = argv[2]; input_len = strlen(input); - + ret = lre_exec(capture, bc, (uint8_t *)input, 0, input_len, 0, NULL); printf("ret=%d\n", ret); if (ret == 1) { |