mirror of the now-defunct rocklinux.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

126 lines
4.5 KiB

  1. --- ./grab/de_tvtoday/tv_grab_de_tvtoday.in 2006/04/21 17:17:21 1.42
  2. +++ ./grab/de_tvtoday/tv_grab_de_tvtoday.in 2006/05/25 17:10:18 1.45 HEAD
  3. @@ -99,7 +99,7 @@
  4. use warnings;
  5. use strict;
  6. -use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.42 2006/04/21 17:17:21 stesie Exp $ ';
  7. +use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.45 2006/05/25 17:10:18 stesie Exp $ ';
  8. use XMLTV::Capabilities qw/baseline manualconfig cache share/;
  9. use XMLTV::Description 'Germany (www.tvtoday.de)';
  10. use Date::Manip;
  11. @@ -138,6 +138,7 @@
  12. else {
  13. *t = \&Log::TraceMessages::t;
  14. *d = \&Log::TraceMessages::d;
  15. + #$Log::TraceMessages::On = 1;
  16. }
  17. }
  18. @@ -502,9 +503,10 @@
  19. }
  20. #-- extract date of grabbed data from retrieved webpage ...
  21. - $_ = $page->look_down('_tag' => 'td', 'class' => 'navigator-hhead-large');
  22. + $_ = $page->look_down('_tag' => 'span', 'class' => 'text-weiss');
  23. die("cannot find date on requested page")
  24. unless($_->as_text() =~ m/([1-3]?[0-9])\.(1?[0-9])\.(20[0-9]{2})/);
  25. + t "extracted date: $3-$2-$1";
  26. $day = ParseDate("$3-$2-$1 00:00:00");
  27. #-- well, now let's scan the table for programme data
  28. @@ -573,6 +575,7 @@
  29. $show{q(episode-num)} = [ [ $1, "onscreen" ] ];
  30. }
  31. + t "show title: $span";
  32. $show{title} = [[ $span, $lang ]];
  33. }
  34. elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") {
  35. @@ -583,7 +586,7 @@
  36. my $title = ($tag->content_list())[0];
  37. - $title = convert_cp1252_chars(\$title);
  38. + convert_cp1252_chars(\$title);
  39. $title =~ s/\s*\([^\(]+\)\s*$//;
  40. if ($title =~ s/\s*(\d+)\.\sTeil//gi) {
  41. @@ -836,6 +839,7 @@
  42. # try to match <category>, <country> <year>; R: <names>; D: <names> construct
  43. # where <country>/<year> or the [RD]: stuff may be missing ...
  44. if(my @parts = ($$desc =~ m/^\s*(\(([^\)]*)\))?\s+([^,;0-9]+)(,?\s+([^,;]+)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?\s*; (?:(?:; )?(Buch\/Regie|R): ([^;]+))?\s*((?:; )?D: (.+))?\s*$/)) {
  45. + t "split rule: <category>, <country> <year> ...";
  46. $$desc = "";
  47. #-- $parts[1] is the show title in English (doesn't have to be available)
  48. @@ -887,9 +891,14 @@
  49. }
  50. }
  51. else {
  52. + t "split rule: dot splitting";
  53. my @data = split "·", $$desc;
  54. s/(^\s|\s$)//g foreach(@data); #CHG#
  55. + for(0 .. (scalar(@data) - 1)) {
  56. + t "dot-split part $_: " . $data[$_];
  57. + }
  58. +
  59. if(scalar(@data) == 3
  60. && not($data[1] =~ m/[\wäöüßÄÖÜ]+:/) #- FIX false positive: tvtoday.de seems to publish "guests: <names>" here some (rare) times :-(
  61. && $data[2] =~ m/^Mit (.*?)$/) {
  62. @@ -945,15 +954,22 @@
  63. next;
  64. }
  65. - if (my ($cat, $rest1, $names, $guests, $rest2) = m/^([^,]+?)((?:\s+-\s+..+?)*) - Moderation: (.+?) - Gäste: (..+?)(?:\s+-\s+(.+))?$/) {
  66. + if (my ($nocat, $cat, $rest1, $names, $guests, $rest2) = m/^(([^,.%^&*();]+?)((?:\s+-\s+..+?)*)|.+) - Moderation: (.+?) - Gäste: (..+?)(?:\s+-\s+(.+))?$/) {
  67. my @data = split_up_names($names, $show);
  68. push @{$show->{"credits"}{"presenter"}}, @data;
  69. my @guest_data = split_up_names($guests, $show);
  70. push @{$show->{"credits"}{"guest"}}, @guest_data;
  71. - $show->{"category"} = [[ $cat, $lang ]];
  72. -
  73. - warn "misdetected category: $cat"
  74. - if($cat =~ m/\d{4}/);
  75. +
  76. + if(defined($cat)) {
  77. + $show->{"category"} = [[ $cat, $lang ]];
  78. +
  79. + warn "misdetected category: $cat"
  80. + if($cat =~ m/\d{4}/);
  81. + }
  82. + else {
  83. + t "no-cat match: $nocat";
  84. + $rest1 = $nocat;
  85. + }
  86. my @rest;
  87. foreach(defined($rest1) ? split(m/\s+-\s+/, $rest1) : undef, $rest2) {
  88. @@ -963,14 +979,21 @@
  89. next unless length($_);
  90. }
  91. - if (my ($cat, $rest1, $names, $rest2) = m/^([^,]+?)((?:\s+-\s+..+?)*) - Moderation: (.+?)(?:\s+-\s+(.+))?$/) {
  92. + if (my ($nocat, $cat, $rest1, $names, $rest2) = m/^(([^,]+?)((?:\s+-\s+..+?)*)|.+) - Moderation: (.+?)(?:\s+-\s+(.+))?$/) {
  93. my @data = split_up_names($names, $show);
  94. push @{$show->{"credits"}{"presenter"}}, @data;
  95. - $show->{"category"} = [[ $cat, $lang ]];
  96. - warn "misdetected category: $cat"
  97. - if($cat =~ m/\d{4}/);
  98. -
  99. + if(defined($cat)) {
  100. + $show->{"category"} = [[ $cat, $lang ]];
  101. +
  102. + warn "misdetected category: $cat"
  103. + if($cat =~ m/\d{4}/);
  104. + }
  105. + else {
  106. + t "no-cat match: $nocat";
  107. + $rest1 = $nocat;
  108. + }
  109. +
  110. my @rest;
  111. foreach(defined($rest1) ? split(m/\s+-\s+/, $rest1) : undef, $rest2) {
  112. push @rest, $_ if(defined($_) && length($_));