Browse Source

Applied pending tre bugfix patches now available in the development repo

early
Christian Wiese 17 years ago
parent
commit
b2f11c5b77
3 changed files with 211 additions and 0 deletions
  1. +43
    -0
      textproc/tre/tre-0.7.5-api-doc.patch
  2. +31
    -0
      textproc/tre/tre-0.7.5-match.patch
  3. +137
    -0
      textproc/tre/tre-0.7.5-tre_compile.patch

+ 43
- 0
textproc/tre/tre-0.7.5-api-doc.patch

@ -0,0 +1,43 @@
Sun Jan 28 21:08:45 EET 2007 Ville Laurikari <vl@iki.fi>
* Fixed regoff_t documentation for wide characters.
The documentation erroneously claimed that offsets are always given in
bytes (they are bytes in byte and multibyte strings, but wchar_t
offsets in wchar_t strings).
Thanks to Gregory Sharp for pointing this out.
diff -rN -u old-stable/doc/tre-api.html new-stable/doc/tre-api.html
--- old-stable/doc/tre-api.html 2007-11-04 20:25:00.000000000 +0200
+++ new-stable/doc/tre-api.html 2007-11-04 20:25:00.000000000 +0200
@@ -323,22 +323,21 @@
<dl>
<dt><tt><font class="type">regoff_t</font> <font
class="arg">rm_so</font></tt></dt>
-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
-to start of substring. </dd>
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to start of
+substring. </dd>
<dt><tt><font class="type">regoff_t</font> <font
class="arg">rm_eo</font></tt></dt>
-<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
-to the first character after the substring. </dd>
+<dd>Offset from start of <tt><font class="arg">string</font></tt> to the first
+character after the substring. </dd>
</dl>
</blockquote>
<p>
-The length of a submatch in bytes can be computed by subtracting
-<code>rm_eo</code> and <code>rm_so</code>.
-If a parenthesized subexpression did not participate in a match, the
-<code>rm_so</code> and <code>rm_eo</code> fields for the corresponding
-<code>pmatch</code> element are set to <code>-1</code>.
-When a multibyte character set is in effect, the submatch offsets are
+The length of a submatch can be computed by subtracting <code>rm_eo</code> and
+<code>rm_so</code>. If a parenthesized subexpression did not participate in a
+match, the <code>rm_so</code> and <code>rm_eo</code> fields for the
+corresponding <code>pmatch</code> element are set to <code>-1</code>. Note
+that when a multibyte character set is in effect, the submatch offsets are
given as byte offsets, not character offsets.
</p>

+ 31
- 0
textproc/tre/tre-0.7.5-match.patch

@ -0,0 +1,31 @@
Sun Nov 4 18:47:56 EET 2007 Ville Laurikari <vl@iki.fi>
* Fixed a bug in \<.
\< always matched at the beginning of the string. Thanks to Shmuel
Zeigerman for the bug report.
See http://laurikari.net/pipermail/tre-general/2007-February/000128.html
diff -rN -u old-stable/lib/tre-match-utils.h new-stable/lib/tre-match-utils.h
--- old-stable/lib/tre-match-utils.h 2007-11-04 20:30:23.000000000 +0200
+++ new-stable/lib/tre-match-utils.h 2007-11-04 20:30:23.000000000 +0200
@@ -161,7 +161,7 @@
&& (next_c != L'\0' || reg_noteol) \
&& (next_c != L'\n' || !reg_newline)) \
|| ((assertions & ASSERT_AT_BOW) \
- && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))) \
+ && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))) \
|| ((assertions & ASSERT_AT_EOW) \
&& (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c))) \
|| ((assertions & ASSERT_AT_WB) \
diff -rN -u old-stable/tests/retest.c new-stable/tests/retest.c
--- old-stable/tests/retest.c 2007-11-04 20:30:23.000000000 +0200
+++ new-stable/tests/retest.c 2007-11-04 20:30:23.000000000 +0200
@@ -1121,6 +1121,8 @@
test_exec("aax xxa", 0, REG_OK, 2, 3, END);
test_comp("\\Bx\\b", REG_EXTENDED, 0);
test_exec("aax xxx", 0, REG_OK, 2, 3, END);
+ test_comp("\\<.", REG_EXTENDED, 0);
+ test_exec(";xaa", 0, REG_OK, 1, 2, END);
/* Shorthands for character classes. */
test_comp("\\w+", REG_EXTENDED, 0);

+ 137
- 0
textproc/tre/tre-0.7.5-tre_compile.patch

@ -0,0 +1,137 @@
Fri Mar 16 19:18:02 EET 2007 Ville Laurikari <vl@iki.fi>
* Refactoring.
diff -rN -u old-stable/lib/tre-compile.c new-stable/lib/tre-compile.c
--- old-stable/lib/tre-compile.c 2007-11-04 20:27:45.000000000 +0200
+++ new-stable/lib/tre-compile.c 2007-11-04 20:27:45.000000000 +0200
@@ -1,7 +1,7 @@
/*
tre-compile.c - TRE regex compiler
- Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>
+ Copyright (c) 2001-2007 Ville Laurikari <vl@iki.fi>
This library is free software; you can redistribute it and/or
modify it under the terms of the GNU Lesser General Public
@@ -127,6 +127,30 @@
int next_tag;
} tre_tag_states_t;
+
+/* Go through `regset' and set submatch data for submatches that are
+ using this tag. */
+static void
+tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
+{
+ int i;
+
+ for (i = 0; regset[i] >= 0; i++)
+ {
+ int id = regset[i] / 2;
+ int start = !(regset[i] % 2);
+ DPRINT((" Using tag %d for %s offset of "
+ "submatch %d\n", tag,
+ start ? "start" : "end", id));
+ if (start)
+ tnfa->submatch_data[id].so_tag = tag;
+ else
+ tnfa->submatch_data[id].eo_tag = tag;
+ }
+ regset[0] = -1;
+}
+
+
/* Adds tags to appropriate locations in the parse tree in `tree', so that
subexpressions marked for submatch addressing can be traced. */
static reg_errcode_t
@@ -281,20 +305,7 @@
minimal_tag = -1;
num_minimals++;
}
- /* Go through the regset and set submatch data for
- submatches that are using this tag. */
- for (i = 0; regset[i] >= 0; i++)
- {
- int id = regset[i] / 2;
- int start = !(regset[i] % 2);
- DPRINT((" Using tag %d for %s offset of "
- "submatch %d\n", tag,
- start ? "start" : "end", id));
- if (start)
- tnfa->submatch_data[id].so_tag = tag;
- else
- tnfa->submatch_data[id].eo_tag = tag;
- }
+ tre_purge_regset(regset, tnfa, tag);
}
else
{
@@ -394,20 +405,7 @@
minimal_tag = -1;
num_minimals++;
}
- /* Go through the regset and set submatch data for
- submatches that are using this tag. */
- for (i = 0; regset[i] >= 0; i++)
- {
- int id = regset[i] / 2;
- int start = !(regset[i] % 2);
- DPRINT((" Using tag %d for %s offset of "
- "submatch %d\n", tag,
- start ? "start" : "end", id));
- if (start)
- tnfa->submatch_data[id].so_tag = tag;
- else
- tnfa->submatch_data[id].eo_tag = tag;
- }
+ tre_purge_regset(regset, tnfa, tag);
}
DPRINT((" num_tags++\n"));
@@ -479,20 +477,7 @@
minimal_tag = -1;
num_minimals++;
}
- /* Go through the regset and set submatch data for
- submatches that are using this tag. */
- for (i = 0; regset[i] >= 0; i++)
- {
- int id = regset[i] / 2;
- int start = !(regset[i] % 2);
- DPRINT((" Using tag %d for %s offset of "
- "submatch %d\n", tag,
- start ? "start" : "end", id));
- if (start)
- tnfa->submatch_data[id].so_tag = tag;
- else
- tnfa->submatch_data[id].eo_tag = tag;
- }
+ tre_purge_regset(regset, tnfa, tag);
}
DPRINT((" num_tags++\n"));
@@ -640,23 +625,7 @@
} /* end while(tre_stack_num_objects(stack) > bottom) */
if (!first_pass)
- {
- int i;
- /* Go through the regset and set submatch data for
- submatches that are using this tag. */
- for (i = 0; regset[i] >= 0; i++)
- {
- int id = regset[i] / 2;
- int start = !(regset[i] % 2);
- DPRINT((" Using tag %d for %s offset of "
- "submatch %d\n", num_tags,
- start ? "start" : "end", id));
- if (start)
- tnfa->submatch_data[id].so_tag = num_tags;
- else
- tnfa->submatch_data[id].eo_tag = num_tags;
- }
- }
+ tre_purge_regset(regset, tnfa, tag);
if (!first_pass && minimal_tag >= 0)
{

Loading…
Cancel
Save