@ -0,0 +1,43 @@ |
|||||
|
Sun Jan 28 21:08:45 EET 2007 Ville Laurikari <vl@iki.fi> |
||||
|
* Fixed regoff_t documentation for wide characters. |
||||
|
The documentation erroneously claimed that offsets are always given in |
||||
|
bytes (they are bytes in byte and multibyte strings, but wchar_t |
||||
|
offsets in wchar_t strings). |
||||
|
|
||||
|
Thanks to Gregory Sharp for pointing this out. |
||||
|
diff -rN -u old-stable/doc/tre-api.html new-stable/doc/tre-api.html
|
||||
|
--- old-stable/doc/tre-api.html 2007-11-04 20:25:00.000000000 +0200
|
||||
|
+++ new-stable/doc/tre-api.html 2007-11-04 20:25:00.000000000 +0200
|
||||
|
@@ -323,22 +323,21 @@
|
||||
|
<dl> |
||||
|
<dt><tt><font class="type">regoff_t</font> <font |
||||
|
class="arg">rm_so</font></tt></dt> |
||||
|
-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
|
||||
|
-to start of substring. </dd>
|
||||
|
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to start of
|
||||
|
+substring. </dd>
|
||||
|
<dt><tt><font class="type">regoff_t</font> <font |
||||
|
class="arg">rm_eo</font></tt></dt> |
||||
|
-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
|
||||
|
-to the first character after the substring. </dd>
|
||||
|
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to the first
|
||||
|
+character after the substring. </dd>
|
||||
|
</dl> |
||||
|
</blockquote> |
||||
|
|
||||
|
<p> |
||||
|
-The length of a submatch in bytes can be computed by subtracting
|
||||
|
-<code>rm_eo</code> and <code>rm_so</code>.
|
||||
|
-If a parenthesized subexpression did not participate in a match, the
|
||||
|
-<code>rm_so</code> and <code>rm_eo</code> fields for the corresponding
|
||||
|
-<code>pmatch</code> element are set to <code>-1</code>.
|
||||
|
-When a multibyte character set is in effect, the submatch offsets are
|
||||
|
+The length of a submatch can be computed by subtracting <code>rm_eo</code> and
|
||||
|
+<code>rm_so</code>. If a parenthesized subexpression did not participate in a
|
||||
|
+match, the <code>rm_so</code> and <code>rm_eo</code> fields for the
|
||||
|
+corresponding <code>pmatch</code> element are set to <code>-1</code>. Note
|
||||
|
+that when a multibyte character set is in effect, the submatch offsets are
|
||||
|
given as byte offsets, not character offsets. |
||||
|
</p> |
||||
|
|
||||
|
|
@ -0,0 +1,31 @@ |
|||||
|
Sun Nov 4 18:47:56 EET 2007 Ville Laurikari <vl@iki.fi> |
||||
|
* Fixed a bug in \<. |
||||
|
\< always matched at the beginning of the string. Thanks to Shmuel |
||||
|
Zeigerman for the bug report. |
||||
|
|
||||
|
See http://laurikari.net/pipermail/tre-general/2007-February/000128.html |
||||
|
diff -rN -u old-stable/lib/tre-match-utils.h new-stable/lib/tre-match-utils.h
|
||||
|
--- old-stable/lib/tre-match-utils.h 2007-11-04 20:30:23.000000000 +0200
|
||||
|
+++ new-stable/lib/tre-match-utils.h 2007-11-04 20:30:23.000000000 +0200
|
||||
|
@@ -161,7 +161,7 @@
|
||||
|
&& (next_c != L'\0' || reg_noteol) \ |
||||
|
&& (next_c != L'\n' || !reg_newline)) \ |
||||
|
|| ((assertions & ASSERT_AT_BOW) \ |
||||
|
- && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))) \
|
||||
|
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
|
||||
|
|| ((assertions & ASSERT_AT_EOW) \ |
||||
|
&& (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \ |
||||
|
|| ((assertions & ASSERT_AT_WB) \ |
||||
|
diff -rN -u old-stable/tests/retest.c new-stable/tests/retest.c
|
||||
|
--- old-stable/tests/retest.c 2007-11-04 20:30:23.000000000 +0200
|
||||
|
+++ new-stable/tests/retest.c 2007-11-04 20:30:23.000000000 +0200
|
||||
|
@@ -1121,6 +1121,8 @@
|
||||
|
test_exec("aax xxa", 0, REG_OK, 2, 3, END); |
||||
|
test_comp("\\Bx\\b", REG_EXTENDED, 0); |
||||
|
test_exec("aax xxx", 0, REG_OK, 2, 3, END); |
||||
|
+ test_comp("\\<.", REG_EXTENDED, 0);
|
||||
|
+ test_exec(";xaa", 0, REG_OK, 1, 2, END);
|
||||
|
|
||||
|
/* Shorthands for character classes. */ |
||||
|
test_comp("\\w+", REG_EXTENDED, 0); |
||||
|
|
@ -0,0 +1,137 @@ |
|||||
|
Fri Mar 16 19:18:02 EET 2007 Ville Laurikari <vl@iki.fi> |
||||
|
* Refactoring. |
||||
|
diff -rN -u old-stable/lib/tre-compile.c new-stable/lib/tre-compile.c
|
||||
|
--- old-stable/lib/tre-compile.c 2007-11-04 20:27:45.000000000 +0200
|
||||
|
+++ new-stable/lib/tre-compile.c 2007-11-04 20:27:45.000000000 +0200
|
||||
|
@@ -1,7 +1,7 @@
|
||||
|
/* |
||||
|
tre-compile.c - TRE regex compiler |
||||
|
|
||||
|
- Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>
|
||||
|
+ Copyright (c) 2001-2007 Ville Laurikari <vl@iki.fi>
|
||||
|
|
||||
|
This library is free software; you can redistribute it and/or |
||||
|
modify it under the terms of the GNU Lesser General Public |
||||
|
@@ -127,6 +127,30 @@
|
||||
|
int next_tag; |
||||
|
} tre_tag_states_t; |
||||
|
|
||||
|
+
|
||||
|
+/* Go through `regset' and set submatch data for submatches that are
|
||||
|
+ using this tag. */
|
||||
|
+static void
|
||||
|
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
|
||||
|
+{
|
||||
|
+ int i;
|
||||
|
+
|
||||
|
+ for (i = 0; regset[i] >= 0; i++)
|
||||
|
+ {
|
||||
|
+ int id = regset[i] / 2;
|
||||
|
+ int start = !(regset[i] % 2);
|
||||
|
+ DPRINT((" Using tag %d for %s offset of "
|
||||
|
+ "submatch %d\n", tag,
|
||||
|
+ start ? "start" : "end", id));
|
||||
|
+ if (start)
|
||||
|
+ tnfa->submatch_data[id].so_tag = tag;
|
||||
|
+ else
|
||||
|
+ tnfa->submatch_data[id].eo_tag = tag;
|
||||
|
+ }
|
||||
|
+ regset[0] = -1;
|
||||
|
+}
|
||||
|
+
|
||||
|
+
|
||||
|
/* Adds tags to appropriate locations in the parse tree in `tree', so that |
||||
|
subexpressions marked for submatch addressing can be traced. */ |
||||
|
static reg_errcode_t |
||||
|
@@ -281,20 +305,7 @@
|
||||
|
minimal_tag = -1; |
||||
|
num_minimals++; |
||||
|
} |
||||
|
- /* Go through the regset and set submatch data for
|
||||
|
- submatches that are using this tag. */
|
||||
|
- for (i = 0; regset[i] >= 0; i++)
|
||||
|
- {
|
||||
|
- int id = regset[i] / 2;
|
||||
|
- int start = !(regset[i] % 2);
|
||||
|
- DPRINT((" Using tag %d for %s offset of "
|
||||
|
- "submatch %d\n", tag,
|
||||
|
- start ? "start" : "end", id));
|
||||
|
- if (start)
|
||||
|
- tnfa->submatch_data[id].so_tag = tag;
|
||||
|
- else
|
||||
|
- tnfa->submatch_data[id].eo_tag = tag;
|
||||
|
- }
|
||||
|
+ tre_purge_regset(regset, tnfa, tag);
|
||||
|
} |
||||
|
else |
||||
|
{ |
||||
|
@@ -394,20 +405,7 @@
|
||||
|
minimal_tag = -1; |
||||
|
num_minimals++; |
||||
|
} |
||||
|
- /* Go through the regset and set submatch data for
|
||||
|
- submatches that are using this tag. */
|
||||
|
- for (i = 0; regset[i] >= 0; i++)
|
||||
|
- {
|
||||
|
- int id = regset[i] / 2;
|
||||
|
- int start = !(regset[i] % 2);
|
||||
|
- DPRINT((" Using tag %d for %s offset of "
|
||||
|
- "submatch %d\n", tag,
|
||||
|
- start ? "start" : "end", id));
|
||||
|
- if (start)
|
||||
|
- tnfa->submatch_data[id].so_tag = tag;
|
||||
|
- else
|
||||
|
- tnfa->submatch_data[id].eo_tag = tag;
|
||||
|
- }
|
||||
|
+ tre_purge_regset(regset, tnfa, tag);
|
||||
|
} |
||||
|
|
||||
|
DPRINT((" num_tags++\n")); |
||||
|
@@ -479,20 +477,7 @@
|
||||
|
minimal_tag = -1; |
||||
|
num_minimals++; |
||||
|
} |
||||
|
- /* Go through the regset and set submatch data for
|
||||
|
- submatches that are using this tag. */
|
||||
|
- for (i = 0; regset[i] >= 0; i++)
|
||||
|
- {
|
||||
|
- int id = regset[i] / 2;
|
||||
|
- int start = !(regset[i] % 2);
|
||||
|
- DPRINT((" Using tag %d for %s offset of "
|
||||
|
- "submatch %d\n", tag,
|
||||
|
- start ? "start" : "end", id));
|
||||
|
- if (start)
|
||||
|
- tnfa->submatch_data[id].so_tag = tag;
|
||||
|
- else
|
||||
|
- tnfa->submatch_data[id].eo_tag = tag;
|
||||
|
- }
|
||||
|
+ tre_purge_regset(regset, tnfa, tag);
|
||||
|
} |
||||
|
|
||||
|
DPRINT((" num_tags++\n")); |
||||
|
@@ -640,23 +625,7 @@
|
||||
|
} /* end while(tre_stack_num_objects(stack) > bottom) */ |
||||
|
|
||||
|
if (!first_pass) |
||||
|
- {
|
||||
|
- int i;
|
||||
|
- /* Go through the regset and set submatch data for
|
||||
|
- submatches that are using this tag. */
|
||||
|
- for (i = 0; regset[i] >= 0; i++)
|
||||
|
- {
|
||||
|
- int id = regset[i] / 2;
|
||||
|
- int start = !(regset[i] % 2);
|
||||
|
- DPRINT((" Using tag %d for %s offset of "
|
||||
|
- "submatch %d\n", num_tags,
|
||||
|
- start ? "start" : "end", id));
|
||||
|
- if (start)
|
||||
|
- tnfa->submatch_data[id].so_tag = num_tags;
|
||||
|
- else
|
||||
|
- tnfa->submatch_data[id].eo_tag = num_tags;
|
||||
|
- }
|
||||
|
- }
|
||||
|
+ tre_purge_regset(regset, tnfa, tag);
|
||||
|
|
||||
|
if (!first_pass && minimal_tag >= 0) |
||||
|
{ |
||||
|
|