提交 7eaa76fc 编写于 作者: S Szabolcs Nagy 提交者: Rich Felker

regex: reject repetitions in some cases with REG_BADRPT

Previously repetitions were accepted after empty expressions like
in (*|?)|{2}, but in BRE the handling of * and \{\} were not
consistent: they were accepted as literals in some cases and
repetitions in others.

It is better to treat repetitions after an empty expression as an
error (this is allowed by the standard, and glibc mostly does the
same). This is hard to do consistently with the current logic so
the new rule is:

Reject repetitions after empty expressions, except after assertions
^*, $? and empty groups ()+ and never treat them as literals.

Empty alternation (|a) is undefined by the standard, but it can be
useful so that should be accepted.
上级 a8cc2253
...@@ -837,6 +837,10 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s) ...@@ -837,6 +837,10 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++); node = tre_ast_new_literal(ctx->mem, v, v, ctx->position++);
s--; s--;
break; break;
case '{':
/* reject repetitions after empty expression in BRE */
if (!ere)
return REG_BADRPT;
default: default:
if (!ere && (unsigned)*s-'1' < 9) { if (!ere && (unsigned)*s-'1' < 9) {
/* back reference */ /* back reference */
...@@ -880,10 +884,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s) ...@@ -880,10 +884,14 @@ static reg_errcode_t parse_atom(tre_parse_ctx_t *ctx, const char *s)
s++; s++;
break; break;
case '*': case '*':
case '|': return REG_BADPAT;
case '{': case '{':
case '+': case '+':
case '?': case '?':
/* reject repetitions after empty expression in ERE */
if (ere)
return REG_BADRPT;
case '|':
if (!ere) if (!ere)
goto parse_literal; goto parse_literal;
case 0: case 0:
...@@ -964,8 +972,9 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx) ...@@ -964,8 +972,9 @@ static reg_errcode_t tre_parse(tre_parse_ctx_t *ctx)
} }
parse_iter: parse_iter:
/* extension: repetitions are accepted after an empty node /* extension: repetitions are rejected after an empty node
eg. (+), ^*, a$?, a|{2} */ eg. (+), |*, {2}, but assertions are not treated as empty
so ^* or $? are accepted currently. */
switch (*s) { switch (*s) {
case '+': case '+':
case '?': case '?':
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册