diff options
Diffstat (limited to 'lib/regexec.c')
-rw-r--r-- | lib/regexec.c | 211 |
1 files changed, 102 insertions, 109 deletions
diff --git a/lib/regexec.c b/lib/regexec.c index a704c1b..d29d442 100644 --- a/lib/regexec.c +++ b/lib/regexec.c @@ -1,21 +1,21 @@ /* Extended regular expression matching and search library. - Copyright (C) 2002-2011 Free Software Foundation, Inc. + Copyright (C) 2002-2013 Free Software Foundation, Inc. This file is part of the GNU C Library. Contributed by Isamu Hasegawa <isamu@yamato.ibm.com>. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. + The GNU C Library is free software; you can redistribute it and/or + modify it under the terms of the GNU General Public + License as published by the Free Software Foundation; either + version 3 of the License, or (at your option) any later version. - This program is distributed in the hope that it will be useful, + The GNU C Library is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + General Public License for more details. - You should have received a copy of the GNU General Public License along - with this program; if not, write to the Free Software Foundation, - Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */ + You should have received a copy of the GNU General Public + License along with the GNU C Library; if not, see + <http://www.gnu.org/licenses/>. */ static reg_errcode_t match_ctx_init (re_match_context_t *cache, int eflags, Idx n) internal_function; @@ -51,9 +51,8 @@ static regoff_t re_search_stub (struct re_pattern_buffer *bufp, regoff_t range, Idx stop, struct re_registers *regs, bool ret_len) internal_function; -static unsigned int re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, - Idx nregs, int regs_allocated) - internal_function; +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, + Idx nregs, int regs_allocated) internal_function; static reg_errcode_t prune_impossible_nodes (re_match_context_t *mctx) internal_function; static Idx check_matching (re_match_context_t *mctx, bool fl_longest_match, @@ -200,7 +199,7 @@ static Idx group_nodes_into_DFAstates (const re_dfa_t *dfa, static bool check_node_accept (const re_match_context_t *mctx, const re_token_t *node, Idx idx) internal_function; -static reg_errcode_t extend_buffers (re_match_context_t *mctx) +static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len) internal_function; /* Entry point for POSIX code. */ @@ -209,11 +208,11 @@ static reg_errcode_t extend_buffers (re_match_context_t *mctx) string STRING. If NMATCH is zero or REG_NOSUB was set in the cflags argument to - `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at + 'regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at least NMATCH elements, and we set them to the offsets of the corresponding matched substrings. - EFLAGS specifies `execution flags' which affect matching: if + EFLAGS specifies "execution flags" which affect matching: if REG_NOTBOL is set, then ^ does not match at the beginning of the string; if REG_NOTEOL is set, then $ does not match at the end. @@ -229,9 +228,7 @@ regexec (preg, string, nmatch, pmatch, eflags) { reg_errcode_t err; Idx start, length; -#ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) preg->buffer; -#endif + re_dfa_t *dfa = preg->buffer; if (eflags & ~(REG_NOTBOL | REG_NOTEOL | REG_STARTEND)) return REG_BADPAT; @@ -247,14 +244,14 @@ regexec (preg, string, nmatch, pmatch, eflags) length = strlen (string); } - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); if (preg->no_sub) err = re_search_internal (preg, string, length, start, length, length, 0, NULL, eflags); else err = re_search_internal (preg, string, length, start, length, length, nmatch, pmatch, eflags); - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return err != REG_NOERROR; } @@ -365,7 +362,6 @@ weak_alias (__re_search_2, re_search_2) #endif static regoff_t -internal_function re_search_2_stub (struct re_pattern_buffer *bufp, const char *string1, Idx length1, const char *string2, Idx length2, @@ -413,7 +409,6 @@ re_search_2_stub (struct re_pattern_buffer *bufp, otherwise the position of the match is returned. */ static regoff_t -internal_function re_search_stub (struct re_pattern_buffer *bufp, const char *string, Idx length, Idx start, regoff_t range, Idx stop, struct re_registers *regs, @@ -424,9 +419,7 @@ re_search_stub (struct re_pattern_buffer *bufp, Idx nregs; regoff_t rval; int eflags = 0; -#ifdef _LIBC - re_dfa_t *dfa = (re_dfa_t *) bufp->buffer; -#endif + re_dfa_t *dfa = bufp->buffer; Idx last_start = start + range; /* Check for out-of-range. */ @@ -437,7 +430,7 @@ re_search_stub (struct re_pattern_buffer *bufp, else if (BE (last_start < 0 || (range < 0 && start <= last_start), 0)) last_start = 0; - __libc_lock_lock (dfa->lock); + lock_lock (dfa->lock); eflags |= (bufp->not_bol) ? REG_NOTBOL : 0; eflags |= (bufp->not_eol) ? REG_NOTEOL : 0; @@ -477,9 +470,9 @@ re_search_stub (struct re_pattern_buffer *bufp, rval = 0; - /* I hope we needn't fill ther regs with -1's when no match was found. */ + /* I hope we needn't fill their regs with -1's when no match was found. */ if (result != REG_NOERROR) - rval = -1; + rval = result == REG_NOMATCH ? -1 : -2; else if (regs != NULL) { /* If caller wants register contents data back, copy them. */ @@ -501,19 +494,18 @@ re_search_stub (struct re_pattern_buffer *bufp, } re_free (pmatch); out: - __libc_lock_unlock (dfa->lock); + lock_unlock (dfa->lock); return rval; } -static unsigned int -internal_function +static unsigned re_copy_regs (struct re_registers *regs, regmatch_t *pmatch, Idx nregs, int regs_allocated) { int rval = REGS_REALLOCATE; Idx i; Idx need_regs = nregs + 1; - /* We need one extra element beyond `num_regs' for the `-1' marker GNU code + /* We need one extra element beyond 'num_regs' for the '-1' marker GNU code uses. */ /* Have the register data arrays been allocated? */ @@ -636,7 +628,7 @@ re_exec (s) (0 <= LAST_START && LAST_START <= LENGTH) */ static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ re_search_internal (const regex_t *preg, const char *string, Idx length, Idx start, Idx last_start, Idx stop, @@ -644,7 +636,7 @@ re_search_internal (const regex_t *preg, int eflags) { reg_errcode_t err; - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + const re_dfa_t *dfa = preg->buffer; Idx left_lim, right_lim; int incr; bool fl_longest_match; @@ -719,7 +711,8 @@ re_search_internal (const regex_t *preg, if (nmatch > 1 || dfa->has_mb_node) { /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= mctx.input.bufs_len, 0)) + if (BE ((MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) + <= mctx.input.bufs_len), 0)) { err = REG_ESPACE; goto free_return; @@ -739,7 +732,7 @@ re_search_internal (const regex_t *preg, mctx.input.tip_context = (eflags & REG_NOTBOL) ? CONTEXT_BEGBUF : CONTEXT_NEWLINE | CONTEXT_BEGBUF; - /* Check incrementally whether of not the input string match. */ + /* Check incrementally whether the input string matches. */ incr = (last_start < start) ? -1 : 1; left_lim = (last_start < start) ? last_start : start; right_lim = (last_start < start) ? start : last_start; @@ -921,7 +914,7 @@ re_search_internal (const regex_t *preg, goto free_return; } - /* At last, add the offset to the each registers, since we slided + /* At last, add the offset to each register, since we slid the buffers so that we could assume that the matching starts from 0. */ for (reg_idx = 0; reg_idx < nmatch; ++reg_idx) @@ -971,7 +964,7 @@ re_search_internal (const regex_t *preg, } static reg_errcode_t -internal_function __attribute_warn_unused_result__ +__attribute_warn_unused_result__ prune_impossible_nodes (re_match_context_t *mctx) { const re_dfa_t *const dfa = mctx->dfa; @@ -987,7 +980,7 @@ prune_impossible_nodes (re_match_context_t *mctx) halt_node = mctx->last_node; /* Avoid overflow. */ - if (BE (SIZE_MAX / sizeof (re_dfastate_t *) <= match_last, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) <= match_last, 0)) return REG_ESPACE; sifted_states = re_malloc (re_dfastate_t *, match_last + 1); @@ -1067,7 +1060,7 @@ prune_impossible_nodes (re_match_context_t *mctx) since initial states may have constraints like "\<", "^", etc.. */ static inline re_dfastate_t * -__attribute ((always_inline)) internal_function +__attribute__ ((always_inline)) internal_function acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, Idx idx) { @@ -1105,7 +1098,7 @@ acquire_init_state_context (reg_errcode_t *err, const re_match_context_t *mctx, FL_LONGEST_MATCH means we want the POSIX longest matching. If P_MATCH_FIRST is not NULL, and the match fails, it is set to the next place where we may want to try matching. - Note that the matcher assume that the maching starts from the current + Note that the matcher assumes that the matching starts from the current index of the buffer. */ static Idx @@ -1174,11 +1167,12 @@ check_matching (re_match_context_t *mctx, bool fl_longest_match, re_dfastate_t *old_state = cur_state; Idx next_char_idx = re_string_cur_idx (&mctx->input) + 1; - if (BE (next_char_idx >= mctx->input.bufs_len, 0) + if ((BE (next_char_idx >= mctx->input.bufs_len, 0) + && mctx->input.bufs_len < mctx->input.len) || (BE (next_char_idx >= mctx->input.valid_len, 0) && mctx->input.valid_len < mctx->input.len)) { - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_char_idx + 1); if (BE (err != REG_NOERROR, 0)) { assert (err == REG_ESPACE); @@ -1435,7 +1429,7 @@ internal_function __attribute_warn_unused_result__ set_regs (const regex_t *preg, const re_match_context_t *mctx, size_t nmatch, regmatch_t *pmatch, bool fl_backtrack) { - const re_dfa_t *dfa = (const re_dfa_t *) preg->buffer; + const re_dfa_t *dfa = preg->buffer; Idx idx, cur_node; re_node_set eps_via_nodes; struct re_fail_stack_t *fs; @@ -1607,21 +1601,21 @@ update_regs (const re_dfa_t *dfa, regmatch_t *pmatch, and sift the nodes in each states according to the following rules. Updated state_log will be wrote to STATE_LOG. - Rules: We throw away the Node `a' in the STATE_LOG[STR_IDX] if... + Rules: We throw away the Node 'a' in the STATE_LOG[STR_IDX] if... 1. When STR_IDX == MATCH_LAST(the last index in the state_log): - If `a' isn't the LAST_NODE and `a' can't epsilon transit to - the LAST_NODE, we throw away the node `a'. - 2. When 0 <= STR_IDX < MATCH_LAST and `a' accepts - string `s' and transit to `b': + If 'a' isn't the LAST_NODE and 'a' can't epsilon transit to + the LAST_NODE, we throw away the node 'a'. + 2. When 0 <= STR_IDX < MATCH_LAST and 'a' accepts + string 's' and transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX+strlen('s')], we throw - away the node `a'. + away the node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX+strlen('s')] but 'b' is - thrown away, we throw away the node `a'. + thrown away, we throw away the node 'a'. 3. When 0 <= STR_IDX < MATCH_LAST and 'a' epsilon transit to 'b': i. If 'b' isn't in the STATE_LOG[STR_IDX], we throw away the - node `a'. + node 'a'. ii. If 'b' is in the STATE_LOG[STR_IDX] but 'b' is thrown away, - we throw away the node `a'. */ + we throw away the node 'a'. */ #define STATE_NODE_CONTAINS(state,node) \ ((state) != NULL && re_node_set_contains (&(state)->nodes, node)) @@ -1694,11 +1688,11 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, Idx i; /* Then build the next sifted state. - We build the next sifted state on `cur_dest', and update - `sifted_states[str_idx]' with `cur_dest'. + We build the next sifted state on 'cur_dest', and update + 'sifted_states[str_idx]' with 'cur_dest'. Note: - `cur_dest' is the sifted state from `state_log[str_idx + 1]'. - `cur_src' points the node_set of the old `state_log[str_idx]' + 'cur_dest' is the sifted state from 'state_log[str_idx + 1]'. + 'cur_src' points the node_set of the old 'state_log[str_idx]' (with the epsilon nodes pre-filtered out). */ for (i = 0; i < cur_src->nelem; i++) { @@ -1711,7 +1705,7 @@ build_sifted_states (const re_match_context_t *mctx, re_sift_context_t *sctx, assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[prev_node].accept_mb) naccepted = sift_states_iter_mb (mctx, sctx, prev_node, str_idx, sctx->last_str_idx); @@ -1752,12 +1746,13 @@ clean_state_log_if_needed (re_match_context_t *mctx, Idx next_state_log_idx) { Idx top = mctx->state_log_top; - if (next_state_log_idx >= mctx->input.bufs_len + if ((next_state_log_idx >= mctx->input.bufs_len + && mctx->input.bufs_len < mctx->input.len) || (next_state_log_idx >= mctx->input.valid_len && mctx->input.valid_len < mctx->input.len)) { reg_errcode_t err; - err = extend_buffers (mctx); + err = extend_buffers (mctx, next_state_log_idx + 1); if (BE (err != REG_NOERROR, 0)) return err; } @@ -2267,17 +2262,17 @@ sift_states_iter_mb (const re_match_context_t *mctx, re_sift_context_t *sctx, { const re_dfa_t *const dfa = mctx->dfa; int naccepted; - /* Check the node can accept `multi byte'. */ + /* Check the node can accept "multi byte". */ naccepted = check_node_accept_bytes (dfa, node_idx, &mctx->input, str_idx); if (naccepted > 0 && str_idx + naccepted <= max_str_idx && !STATE_NODE_CONTAINS (sctx->sifted_states[str_idx + naccepted], dfa->nexts[node_idx])) - /* The node can't accept the `multi byte', or the + /* The node can't accept the "multi byte", or the destination was already thrown away, then the node - could't accept the current input `multi byte'. */ + could't accept the current input "multi byte". */ naccepted = 0; /* Otherwise, it is sure that the node could accept - `naccepted' bytes input. */ + 'naccepted' bytes input. */ return naccepted; } #endif /* RE_ENABLE_I18N */ @@ -2456,7 +2451,7 @@ find_recover_state (reg_errcode_t *err, re_match_context_t *mctx) /* From the node set CUR_NODES, pick up the nodes whose types are OP_OPEN_SUBEXP and which have corresponding back references in the regular expression. And register them to use them later for evaluating the - correspoding back references. */ + corresponding back references. */ static reg_errcode_t internal_function @@ -2567,7 +2562,7 @@ transit_state_mb (re_match_context_t *mctx, re_dfastate_t *pstate) if (naccepted == 0) continue; - /* The node can accepts `naccepted' bytes. */ + /* The node can accepts 'naccepted' bytes. */ dest_idx = re_string_cur_idx (&mctx->input) + naccepted; mctx->max_mb_elem_len = ((mctx->max_mb_elem_len < naccepted) ? naccepted : mctx->max_mb_elem_len); @@ -2619,7 +2614,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) const re_token_t *node = dfa->nodes + node_idx; re_node_set *new_dest_nodes; - /* Check whether `node' is a backreference or not. */ + /* Check whether 'node' is a backreference or not. */ if (node->type != OP_BACK_REF) continue; @@ -2631,14 +2626,14 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) continue; } - /* `node' is a backreference. + /* 'node' is a backreference. Check the substring which the substring matched. */ bkc_idx = mctx->nbkref_ents; err = get_subexp (mctx, node_idx, cur_str_idx); if (BE (err != REG_NOERROR, 0)) goto free_return; - /* And add the epsilon closures (which is `new_dest_nodes') of + /* And add the epsilon closures (which is 'new_dest_nodes') of the backreference to appropriate state_log. */ #ifdef DEBUG assert (dfa->nexts[node_idx] != REG_MISSING); @@ -2662,7 +2657,7 @@ transit_state_bkref (re_match_context_t *mctx, const re_node_set *nodes) dest_state = mctx->state_log[dest_str_idx]; prev_nelem = ((mctx->state_log[cur_str_idx] == NULL) ? 0 : mctx->state_log[cur_str_idx]->nodes.nelem); - /* Add `new_dest_node' to state_log. */ + /* Add 'new_dest_node' to state_log. */ if (dest_state == NULL) { mctx->state_log[dest_str_idx] @@ -2814,7 +2809,7 @@ get_subexp (re_match_context_t *mctx, Idx bkref_node, Idx bkref_str_idx) if (bkref_str_off >= mctx->input.len) break; - err = extend_buffers (mctx); + err = extend_buffers (mctx, bkref_str_off + 1); if (BE (err != REG_NOERROR, 0)) return err; @@ -2936,9 +2931,12 @@ check_arrival (re_match_context_t *mctx, state_array_t *path, Idx top_node, { re_dfastate_t **new_array; Idx old_alloc = path->alloc; - Idx new_alloc = old_alloc + last_str + mctx->max_mb_elem_len + 1; - if (BE (new_alloc < old_alloc, 0) - || BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) + Idx incr_alloc = last_str + mctx->max_mb_elem_len + 1; + Idx new_alloc; + if (BE (IDX_MAX - old_alloc < incr_alloc, 0)) + return REG_ESPACE; + new_alloc = old_alloc + incr_alloc; + if (BE (SIZE_MAX / sizeof (re_dfastate_t *) < new_alloc, 0)) return REG_ESPACE; new_array = re_realloc (path->array, re_dfastate_t *, new_alloc); if (BE (new_array == NULL, 0)) @@ -3101,7 +3099,7 @@ check_arrival_add_next_nodes (re_match_context_t *mctx, Idx str_idx, assert (!IS_EPSILON_NODE (type)); #endif #ifdef RE_ENABLE_I18N - /* If the node may accept `multi byte'. */ + /* If the node may accept "multi byte". */ if (dfa->nodes[cur_node].accept_mb) { naccepted = check_node_accept_bytes (dfa, cur_node, &mctx->input, @@ -3358,7 +3356,7 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) bitset_word_t elem, mask; bool dests_node_malloced = false; bool dest_states_malloced = false; - Idx ndests; /* Number of the destination states from `state'. */ + Idx ndests; /* Number of the destination states from 'state'. */ re_dfastate_t **trtable; re_dfastate_t **dest_states = NULL, **dest_states_word, **dest_states_nl; re_node_set follows, *dests_node; @@ -3372,8 +3370,8 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) } *dests_alloc; /* We build DFA states which corresponds to the destination nodes - from `state'. `dests_node[i]' represents the nodes which i-th - destination state contains, and `dests_ch[i]' represents the + from 'state'. 'dests_node[i]' represents the nodes which i-th + destination state contains, and 'dests_ch[i]' represents the characters which i-th destination state accepts. */ if (__libc_use_alloca (sizeof (struct dests_alloc))) dests_alloc = (struct dests_alloc *) alloca (sizeof (struct dests_alloc)); @@ -3387,16 +3385,17 @@ build_trtable (const re_dfa_t *dfa, re_dfastate_t *state) dests_node = dests_alloc->dests_node; dests_ch = dests_alloc->dests_ch; - /* Initialize transiton table. */ + /* Initialize transition table. */ state->word_trtable = state->trtable = NULL; - /* At first, group all nodes belonging to `state' into several + /* At first, group all nodes belonging to 'state' into several destinations. */ ndests = group_nodes_into_DFAstates (dfa, state, dests_node, dests_ch); if (BE (! REG_VALID_NONZERO_INDEX (ndests), 0)) { if (dests_node_malloced) free (dests_alloc); + /* Return false in case of an error, true otherwise. */ if (ndests == 0) { state->trtable = (re_dfastate_t **) @@ -3592,13 +3591,13 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, reg_errcode_t err; bool ok; Idx i, j, k; - Idx ndests; /* Number of the destinations from `state'. */ + Idx ndests; /* Number of the destinations from 'state'. */ bitset_t accepts; /* Characters a node can accept. */ const re_node_set *cur_nodes = &state->nodes; bitset_empty (accepts); ndests = 0; - /* For all the nodes belonging to `state', */ + /* For all the nodes belonging to 'state', */ for (i = 0; i < cur_nodes->nelem; ++i) { re_token_t *node = &dfa->nodes[cur_nodes->elems[i]]; @@ -3641,7 +3640,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, else continue; - /* Check the `accepts' and sift the characters which are not + /* Check the 'accepts' and sift the characters which are not match it the context. */ if (constraint) { @@ -3700,7 +3699,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } } - /* Then divide `accepts' into DFA states, or create a new + /* Then divide 'accepts' into DFA states, or create a new state. Above, we make sure that accepts is not empty. */ for (j = 0; j < ndests; ++j) { @@ -3713,7 +3712,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (type == CHARACTER && !bitset_contain (dests_ch[j], node->opr.c)) continue; - /* Enumerate the intersection set of this state and `accepts'. */ + /* Enumerate the intersection set of this state and 'accepts'. */ has_intersec = 0; for (k = 0; k < BITSET_WORDS; ++k) has_intersec |= intersec[k] = accepts[k] & dests_ch[j][k]; @@ -3721,7 +3720,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, if (!has_intersec) continue; - /* Then check if this state is a subset of `accepts'. */ + /* Then check if this state is a subset of 'accepts'. */ not_subset = not_consumed = 0; for (k = 0; k < BITSET_WORDS; ++k) { @@ -3729,8 +3728,8 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, not_consumed |= accepts[k] = accepts[k] & ~dests_ch[j][k]; } - /* If this state isn't a subset of `accepts', create a - new group state, which has the `remains'. */ + /* If this state isn't a subset of 'accepts', create a + new group state, which has the 'remains'. */ if (not_subset) { bitset_copy (dests_ch[ndests], remains); @@ -3769,7 +3768,7 @@ group_nodes_into_DFAstates (const re_dfa_t *dfa, const re_dfastate_t *state, } #ifdef RE_ENABLE_I18N -/* Check how many bytes the node `dfa->nodes[node_idx]' accepts. +/* Check how many bytes the node 'dfa->nodes[node_idx]' accepts. Return the number of the bytes the node accepts. STR_IDX is the current index of the input string. @@ -3896,7 +3895,6 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, const int32_t *table, *indirect; const unsigned char *weights, *extra; const char *collseqwc; - int32_t idx; /* This #include defines a local function! */ # include <locale/weight.h> @@ -3934,6 +3932,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, in_collseq = find_collation_sequence_value (pin, elem_len); } /* match with range expression? */ + /* FIXME: Implement rational ranges here, too. */ for (i = 0; i < cset->nranges; ++i) if (cset->range_starts[i] <= in_collseq && in_collseq <= cset->range_ends[i]) @@ -3954,7 +3953,7 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB); indirect = (const int32_t *) _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB); - int32_t idx = findidx (&cp); + int32_t idx = findidx (&cp, elem_len); if (idx > 0) for (i = 0; i < cset->nequiv_classes; ++i) { @@ -3985,18 +3984,9 @@ check_node_accept_bytes (const re_dfa_t *dfa, Idx node_idx, # endif /* _LIBC */ { /* match with range expression? */ -#if __GNUC__ >= 2 && ! (__STDC_VERSION__ < 199901L && __STRICT_ANSI__) - wchar_t cmp_buf[] = {L'\0', L'\0', wc, L'\0', L'\0', L'\0'}; -#else - wchar_t cmp_buf[] = {L'\0', L'\0', L'\0', L'\0', L'\0', L'\0'}; - cmp_buf[2] = wc; -#endif for (i = 0; i < cset->nranges; ++i) { - cmp_buf[0] = cset->range_starts[i]; - cmp_buf[4] = cset->range_ends[i]; - if (wcscoll (cmp_buf, cmp_buf + 2) <= 0 - && wcscoll (cmp_buf + 2, cmp_buf + 4) <= 0) + if (cset->range_starts[i] <= wc && wc <= cset->range_ends[i]) { match_len = char_len; goto check_node_accept_bytes_match; @@ -4066,7 +4056,7 @@ find_collation_sequence_value (const unsigned char *mbs, size_t mbs_len) /* Skip the collation sequence value. */ idx += sizeof (uint32_t); /* Skip the wide char sequence of the collating element. */ - idx = idx + sizeof (uint32_t) * (extra[idx] + 1); + idx = idx + sizeof (uint32_t) * (*(int32_t *) (extra + idx) + 1); /* If we found the entry, return the sequence value. */ if (found) return *(uint32_t *) (extra + idx); @@ -4134,17 +4124,20 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node, static reg_errcode_t internal_function __attribute_warn_unused_result__ -extend_buffers (re_match_context_t *mctx) +extend_buffers (re_match_context_t *mctx, int min_len) { reg_errcode_t ret; re_string_t *pstr = &mctx->input; /* Avoid overflow. */ - if (BE (SIZE_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / sizeof (re_dfastate_t *)) / 2 + <= pstr->bufs_len, 0)) return REG_ESPACE; - /* Double the lengthes of the buffers. */ - ret = re_string_realloc_buffers (pstr, pstr->bufs_len * 2); + /* Double the lengths of the buffers, but allocate at least MIN_LEN. */ + ret = re_string_realloc_buffers (pstr, + MAX (min_len, + MIN (pstr->len, pstr->bufs_len * 2))); if (BE (ret != REG_NOERROR, 0)) return ret; @@ -4207,7 +4200,7 @@ match_ctx_init (re_match_context_t *mctx, int eflags, Idx n) size_t max_object_size = MAX (sizeof (struct re_backref_cache_entry), sizeof (re_sub_match_top_t *)); - if (BE (SIZE_MAX / max_object_size < n, 0)) + if (BE (MIN (IDX_MAX, SIZE_MAX / max_object_size) < n, 0)) return REG_ESPACE; mctx->bkref_ents = re_malloc (struct re_backref_cache_entry, n); |