Applied pending tre bugfix patches now available in the development repo

17 years ago · b2f11c5b77
3 changed files with 211 additions and 0 deletions
--- a/textproc/tre/tre-0.7.5-api-doc.patch
+++ b/textproc/tre/tre-0.7.5-api-doc.patch
@ -0,0 +1,43 @@
 Sun Jan 28 21:08:45 EET 2007  Ville Laurikari <vl@iki.fi>
  * Fixed regoff_t documentation for wide characters.
  The documentation erroneously claimed that offsets are always given in
  bytes (they are bytes in byte and multibyte strings, but wchar_t
  offsets in wchar_t strings).
  Thanks to Gregory Sharp for pointing this out.
 diff -rN -u old-stable/doc/tre-api.html new-stable/doc/tre-api.html
 --- old-stable/doc/tre-api.html	2007-11-04 20:25:00.000000000 +0200
 +++ new-stable/doc/tre-api.html	2007-11-04 20:25:00.000000000 +0200
@@ -323,22 +323,21 @@
 <dl>
 <dt><tt><font class="type">regoff_t</font> <font
 class="arg">rm_so</font></tt></dt>
 -<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
 -to start of substring.  </dd>
 +<dd>Offset from start of <tt><font class="arg">string</font></tt> to start of
 +substring.  </dd>
 <dt><tt><font class="type">regoff_t</font> <font
 class="arg">rm_eo</font></tt></dt>
 -<dd>Byte offset from start of <tt><font class="arg">string</font></tt>
 -to the first character after the substring.  </dd>
 +<dd>Offset from start of <tt><font class="arg">string</font></tt> to the first
 +character after the substring.  </dd>
 </dl>
 </blockquote>
 <p>
 -The length of a submatch in bytes can be computed by subtracting
 -<code>rm_eo</code> and <code>rm_so</code>.
 -If a parenthesized subexpression did not participate in a match, the
 -<code>rm_so</code> and <code>rm_eo</code> fields for the corresponding
 -<code>pmatch</code> element are set to <code>-1</code>.
 -When a multibyte character set is in effect, the submatch offsets are
 +The length of a submatch can be computed by subtracting <code>rm_eo</code> and
 +<code>rm_so</code>.  If a parenthesized subexpression did not participate in a
 +match, the <code>rm_so</code> and <code>rm_eo</code> fields for the
 +corresponding <code>pmatch</code> element are set to <code>-1</code>.  Note
 +that when a multibyte character set is in effect, the submatch offsets are
 given as byte offsets, not character offsets.
 </p>
--- a/textproc/tre/tre-0.7.5-match.patch
+++ b/textproc/tre/tre-0.7.5-match.patch
@ -0,0 +1,31 @@
 Sun Nov  4 18:47:56 EET 2007  Ville Laurikari <vl@iki.fi>
  * Fixed a bug in \<.
  \< always matched at the beginning of the string.  Thanks to Shmuel
  Zeigerman for the bug report.
  See http://laurikari.net/pipermail/tre-general/2007-February/000128.html
 diff -rN -u old-stable/lib/tre-match-utils.h new-stable/lib/tre-match-utils.h
 --- old-stable/lib/tre-match-utils.h	2007-11-04 20:30:23.000000000 +0200
 +++ new-stable/lib/tre-match-utils.h	2007-11-04 20:30:23.000000000 +0200
@@ -161,7 +161,7 @@
        && (next_c != L'\0' || reg_noteol)				      \
        && (next_c != L'\n' || !reg_newline))				      \
    || ((assertions & ASSERT_AT_BOW)					      \
 -       && (pos > 0 && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c))))	      \
 +       && (IS_WORD_CHAR(prev_c) || !IS_WORD_CHAR(next_c)))		      \
    || ((assertions & ASSERT_AT_EOW)					      \
        && (!IS_WORD_CHAR(prev_c) || IS_WORD_CHAR(next_c)))		      \
    || ((assertions & ASSERT_AT_WB)					      \
 diff -rN -u old-stable/tests/retest.c new-stable/tests/retest.c
 --- old-stable/tests/retest.c	2007-11-04 20:30:23.000000000 +0200
 +++ new-stable/tests/retest.c	2007-11-04 20:30:23.000000000 +0200
@@ -1121,6 +1121,8 @@
   test_exec("aax xxa", 0, REG_OK, 2, 3, END);
   test_comp("\\Bx\\b", REG_EXTENDED, 0);
   test_exec("aax xxx", 0, REG_OK, 2, 3, END);
 +  test_comp("\\<.", REG_EXTENDED, 0);
 +  test_exec(";xaa", 0, REG_OK, 1, 2, END);
   /* Shorthands for character classes. */
   test_comp("\\w+", REG_EXTENDED, 0);
--- a/textproc/tre/tre-0.7.5-tre_compile.patch
+++ b/textproc/tre/tre-0.7.5-tre_compile.patch
@ -0,0 +1,137 @@
 Fri Mar 16 19:18:02 EET 2007  Ville Laurikari <vl@iki.fi>
  * Refactoring.
 diff -rN -u old-stable/lib/tre-compile.c new-stable/lib/tre-compile.c
 --- old-stable/lib/tre-compile.c	2007-11-04 20:27:45.000000000 +0200
 +++ new-stable/lib/tre-compile.c	2007-11-04 20:27:45.000000000 +0200
@@ -1,7 +1,7 @@
 /*
   tre-compile.c - TRE regex compiler
 -  Copyright (c) 2001-2006 Ville Laurikari <vl@iki.fi>
 +  Copyright (c) 2001-2007 Ville Laurikari <vl@iki.fi>
   This library is free software; you can redistribute it and/or
   modify it under the terms of the GNU Lesser General Public
@@ -127,6 +127,30 @@
   int next_tag;
 } tre_tag_states_t;
 +
 +/* Go through `regset' and set submatch data for submatches that are
 +   using this tag. */
 +static void
 +tre_purge_regset(int *regset, tre_tnfa_t *tnfa, int tag)
 +{
 +  int i;
 +
 +  for (i = 0; regset[i] >= 0; i++)
 +    {
 +      int id = regset[i] / 2;
 +      int start = !(regset[i] % 2);
 +      DPRINT(("  Using tag %d for %s offset of "
 +	      "submatch %d\n", tag,
 +	      start ? "start" : "end", id));
 +      if (start)
 +	tnfa->submatch_data[id].so_tag = tag;
 +      else
 +	tnfa->submatch_data[id].eo_tag = tag;
 +    }
 +  regset[0] = -1;
 +}
 +
 +
 /* Adds tags to appropriate locations in the parse tree in `tree', so that
    subexpressions marked for submatch addressing can be traced. */
 static reg_errcode_t
@@ -281,20 +305,7 @@
 				minimal_tag = -1;
 				num_minimals++;
 			      }
 -			    /* Go through the regset and set submatch data for
 -			       submatches that are using this tag. */
 -			    for (i = 0; regset[i] >= 0; i++)
 -			      {
 -				int id = regset[i] / 2;
 -				int start = !(regset[i] % 2);
 -				DPRINT(("  Using tag %d for %s offset of "
 -					"submatch %d\n", tag,
 -					start ? "start" : "end", id));
 -				if (start)
 -				  tnfa->submatch_data[id].so_tag = tag;
 -				else
 -				  tnfa->submatch_data[id].eo_tag = tag;
 -			      }
 +			    tre_purge_regset(regset, tnfa, tag);
 			  }
 			else
 			  {
@@ -394,20 +405,7 @@
 			    minimal_tag = -1;
 			    num_minimals++;
 			  }
 -			/* Go through the regset and set submatch data for
 -			   submatches that are using this tag. */
 -			for (i = 0; regset[i] >= 0; i++)
 -			  {
 -			    int id = regset[i] / 2;
 -			    int start = !(regset[i] % 2);
 -			    DPRINT(("  Using tag %d for %s offset of "
 -				    "submatch %d\n", tag,
 -				    start ? "start" : "end", id));
 -			    if (start)
 -			      tnfa->submatch_data[id].so_tag = tag;
 -			    else
 -			      tnfa->submatch_data[id].eo_tag = tag;
 -			  }
 +			tre_purge_regset(regset, tnfa, tag);
 		      }
 		    DPRINT(("  num_tags++\n"));
@@ -479,20 +477,7 @@
 			    minimal_tag = -1;
 			    num_minimals++;
 			  }
 -			/* Go through the regset and set submatch data for
 -			   submatches that are using this tag. */
 -			for (i = 0; regset[i] >= 0; i++)
 -			  {
 -			    int id = regset[i] / 2;
 -			    int start = !(regset[i] % 2);
 -			    DPRINT(("  Using tag %d for %s offset of "
 -				    "submatch %d\n", tag,
 -				    start ? "start" : "end", id));
 -			    if (start)
 -			      tnfa->submatch_data[id].so_tag = tag;
 -			    else
 -			      tnfa->submatch_data[id].eo_tag = tag;
 -			  }
 +			tre_purge_regset(regset, tnfa, tag);
 		      }
 		    DPRINT(("  num_tags++\n"));
@@ -640,23 +625,7 @@
     } /* end while(tre_stack_num_objects(stack) > bottom) */
   if (!first_pass)
 -    {
 -      int i;
 -      /* Go through the regset and set submatch data for
 -	 submatches that are using this tag. */
 -      for (i = 0; regset[i] >= 0; i++)
 -	{
 -	  int id = regset[i] / 2;
 -	  int start = !(regset[i] % 2);
 -	  DPRINT(("  Using tag %d for %s offset of "
 -		  "submatch %d\n", num_tags,
 -		  start ? "start" : "end", id));
 -	  if (start)
 -	    tnfa->submatch_data[id].so_tag = num_tags;
 -	  else
 -	    tnfa->submatch_data[id].eo_tag = num_tags;
 -	}
 -    }
 +    tre_purge_regset(regset, tnfa, tag);
   if (!first_pass && minimal_tag >= 0)
     {