1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
|
diff --git a/posix/Makefile b/posix/Makefile
index 88d409f..2cacd21 100644
--- a/posix/Makefile
+++ b/posix/Makefile
@@ -86,7 +86,7 @@ tests := tstgetopt testfnm runtests runptests \
tst-rfc3484-3 \
tst-getaddrinfo3 tst-fnmatch2 tst-cpucount tst-cpuset \
bug-getopt1 bug-getopt2 bug-getopt3 bug-getopt4 \
- bug-getopt5 tst-getopt_long1
+ bug-getopt5 tst-getopt_long1 bug-regex34
xtests := bug-ga2
ifeq (yes,$(build-shared))
test-srcs := globtest
@@ -199,5 +199,6 @@ bug-regex26-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex30-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex32-ENV = LOCPATH=$(common-objpfx)localedata
bug-regex33-ENV = LOCPATH=$(common-objpfx)localedata
+bug-regex34-ENV = LOCPATH=$(common-objpfx)localedata
tst-rxspencer-ARGS = --utf8 rxspencer/tests
tst-rxspencer-ENV = LOCPATH=$(common-objpfx)localedata
diff --git a/posix/bug-regex34.c b/posix/bug-regex34.c
new file mode 100644
index 0000000..bb3b613
--- /dev/null
+++ b/posix/bug-regex34.c
@@ -0,0 +1,46 @@
+/* Test re_search with multi-byte characters in UTF-8.
+ Copyright (C) 2013 Free Software Foundation, Inc.
+ This file is part of the GNU C Library.
+
+ The GNU C Library is free software; you can redistribute it and/or
+ modify it under the terms of the GNU Lesser General Public
+ License as published by the Free Software Foundation; either
+ version 2.1 of the License, or (at your option) any later version.
+
+ The GNU C Library is distributed in the hope that it will be useful,
+ but WITHOUT ANY WARRANTY; without even the implied warranty of
+ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ Lesser General Public License for more details.
+
+ You should have received a copy of the GNU Lesser General Public
+ License along with the GNU C Library; if not, see
+ <http://www.gnu.org/licenses/>. */
+
+#define _GNU_SOURCE 1
+#include <stdio.h>
+#include <string.h>
+#include <locale.h>
+#include <regex.h>
+
+static int
+do_test (void)
+{
+ struct re_pattern_buffer r;
+ /* áá»á½ááºá¯ááºx */
+ const char *s = "\xe1\x80\x80\xe1\x80\xbb\xe1\x80\xbd\xe1\x80\x94\xe1\x80\xba\xe1\x80\xaf\xe1\x80\x95\xe1\x80\xbax";
+
+ if (setlocale (LC_ALL, "en_US.UTF-8") == NULL)
+ {
+ puts ("setlocale failed");
+ return 1;
+ }
+ memset (&r, 0, sizeof (r));
+
+ re_compile_pattern ("[^x]x", 5, &r);
+ /* This was triggering a buffer overflow. */
+ re_search (&r, s, strlen (s), 0, strlen (s), 0);
+ return 0;
+}
+
+#define TEST_FUNCTION do_test ()
+#include "../test-skeleton.c"
diff --git a/posix/regexec.c b/posix/regexec.c
index 7f2de85..5ca2bf6 100644
--- a/posix/regexec.c
+++ b/posix/regexec.c
@@ -197,7 +197,7 @@ static int group_nodes_into_DFAstates (const re_dfa_t *dfa,
static int check_node_accept (const re_match_context_t *mctx,
const re_token_t *node, int idx)
internal_function;
-static reg_errcode_t extend_buffers (re_match_context_t *mctx)
+static reg_errcode_t extend_buffers (re_match_context_t *mctx, int min_len)
internal_function;
/* Entry point for POSIX code. */
@@ -1160,7 +1160,7 @@ check_matching (re_match_context_t *mctx, int fl_longest_match,
|| (BE (next_char_idx >= mctx->input.valid_len, 0)
&& mctx->input.valid_len < mctx->input.len))
{
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, next_char_idx + 1);
if (BE (err != REG_NOERROR, 0))
{
assert (err == REG_ESPACE);
@@ -1738,7 +1738,7 @@ clean_state_log_if_needed (re_match_context_t *mctx, int next_state_log_idx)
&& mctx->input.valid_len < mctx->input.len))
{
reg_errcode_t err;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, next_state_log_idx + 1);
if (BE (err != REG_NOERROR, 0))
return err;
}
@@ -2792,7 +2792,7 @@ get_subexp (re_match_context_t *mctx, int bkref_node, int bkref_str_idx)
if (bkref_str_off >= mctx->input.len)
break;
- err = extend_buffers (mctx);
+ err = extend_buffers (mctx, bkref_str_off + 1);
if (BE (err != REG_NOERROR, 0))
return err;
@@ -4102,7 +4102,7 @@ check_node_accept (const re_match_context_t *mctx, const re_token_t *node,
static reg_errcode_t
internal_function __attribute_warn_unused_result__
-extend_buffers (re_match_context_t *mctx)
+extend_buffers (re_match_context_t *mctx, int min_len)
{
reg_errcode_t ret;
re_string_t *pstr = &mctx->input;
@@ -4111,8 +4111,10 @@ extend_buffers (re_match_context_t *mctx)
if (BE (INT_MAX / 2 / sizeof (re_dfastate_t *) <= pstr->bufs_len, 0))
return REG_ESPACE;
- /* Double the lengthes of the buffers. */
- ret = re_string_realloc_buffers (pstr, MIN (pstr->len, pstr->bufs_len * 2));
+ /* Double the lengthes of the buffers, but allocate at least MIN_LEN. */
+ ret = re_string_realloc_buffers (pstr,
+ MAX (min_len,
+ MIN (pstr->len, pstr->bufs_len * 2)));
if (BE (ret != REG_NOERROR, 0))
return ret;
--
1.7.1
|