mirror of the now-defunct rocklinux.org
You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

301 lines
11 KiB

  1. # --- ROCK-COPYRIGHT-NOTE-BEGIN ---
  2. #
  3. # This copyright note is auto-generated by ./scripts/Create-CopyPatch.
  4. # Please add additional copyright information _after_ the line containing
  5. # the ROCK-COPYRIGHT-NOTE-END tag. Otherwise it might get removed by
  6. # the ./scripts/Create-CopyPatch script. Do not edit this copyright text!
  7. #
  8. # ROCK Linux: rock-src/package/fake/xmltv/tv-grab-de-tvtoday-cvs.patch
  9. # ROCK Linux is Copyright (C) 1998 - 2004 Clifford Wolf
  10. #
  11. # This patch file is dual-licensed. It is available under the license the
  12. # patched project is licensed under, as long as it is an OpenSource license
  13. # as defined at http://www.opensource.org/ (e.g. BSD, X11) or under the terms
  14. # of the GNU General Public License as published by the Free Software
  15. # Foundation; either version 2 of the License, or (at your option) any later
  16. # version.
  17. #
  18. # --- ROCK-COPYRIGHT-NOTE-END ---
  19. --- ./grab/de_tvtoday/tv_grab_de_tvtoday.in.orig 2004-09-15 15:41:16.423279656 +0200
  20. +++ ./grab/de_tvtoday/tv_grab_de_tvtoday.in 2004-09-15 15:47:54.423774384 +0200
  21. @@ -17,7 +17,7 @@
  22. [--days N] [--offset N]
  23. [--quiet] [--slow] [--nosqueezeout]
  24. -tv_grab_de_tvtoday --list-channels
  25. +tv_grab_de_tvtoday --list-channels [--icons]
  26. =head1 DESCRIPTION
  27. @@ -62,6 +62,10 @@
  28. B<--list-channels> write output giving <channel> elements for every
  29. channel available (ignoring the config file), but no programmes.
  30. +B<--icons> get the URL for channel-logos together with the channel-list.
  31. +Mind that this takes a long time, since a webpage has to be requested for
  32. +every channel.
  33. +
  34. B<--help> print a help message and exit.
  35. =head1 SEE ALSO
  36. @@ -83,7 +87,7 @@
  37. use warnings;
  38. use strict;
  39. use Date::Manip;
  40. -use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.13 2004/05/09 17:49:11 epaepa Exp $ ';
  41. +use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.19 2004/07/17 14:45:34 stesie Exp $ ';
  42. use Getopt::Long;
  43. use HTML::TreeBuilder;
  44. use HTML::Entities;
  45. @@ -101,7 +105,7 @@
  46. To grab data: $0 [--config-file FILE] [--output FILE]
  47. [--days N] [--offset N]
  48. [--quiet] [--slow] [--nosqueezeout]
  49. -Channel List: $0 --list-channels
  50. +Channel List: $0 --list-channels [--icons]
  51. END
  52. ;
  53. @@ -123,6 +127,7 @@
  54. sub squeeze_out_desc($$);
  55. sub refine_category_attr($$);
  56. sub get_channels();
  57. +sub get_icons();
  58. sub channel_id($);
  59. sub split_up_names($$);
  60. sub parse_date_data($);
  61. @@ -133,7 +138,7 @@
  62. sub refine_credits($);
  63. #-- Category-Matching RegExp
  64. -our constant $category_regexp = '^(.*?\s+)?((?:[\w�������-]+-?)?(?:[Aa]genten|[Cc]harts|[Dd]oku(?:mentar|mentation)?|Episoden|[Dd]rama|[Kk]rimi|[Kk]om�die|[Ll]iteratur|[Mm]agazin|[Mm]elodram|[Pp]ortr�t|[Rr]eportage|[Rr]eihe|[Ss]oap|[Ss]atire|[Ss]erie|[Ss]tudie|[Tt]alk|[Tt]hriller)-?(?:[Ff]ilm|[Mm]ovie|[Ss]how)?s?)([\s;,]+.*)?$';
  65. +our constant $category_regexp = '^(.*?\s+)?((?:[\w�������-]+-?)?(?:[Aa]genten|[Cc]harts|[Cc]omedy|[Dd]oku(?:mentar|mentation)?|Episoden|[Dd]rama|[Kk]rimi|[Kk]om�die|[Ll]iteratur|[Mm]agazin|[Mm]elodram|[Pp]ortr�t|[Rr]eportage|[Rr]eihe|[Ss]oap|[Ss]atire|[Ss]erie|[Ss]tudie|[Tt]alk|[Tt]hriller|Wunschclip)-?(?:[Ff]ilm|[Mm]ovie|[Ss]how)?s?)([\s;,]+.*)?$';
  66. #-- DEBUG FLUFF ...
  67. my $debug = 0;
  68. @@ -166,6 +171,7 @@
  69. my $opt_slow = 0;
  70. my $opt_nosqueeze = 0;
  71. my $opt_list_channels;
  72. +my $opt_icons = 0;
  73. my $opt_help;
  74. my $opt_share;
  75. @@ -179,6 +185,7 @@
  76. 'slow' => \$opt_slow,
  77. 'nosqueezeout' => \$opt_nosqueeze,
  78. 'list-channels' => \$opt_list_channels,
  79. + 'icons' => \$opt_icons,
  80. 'help' => \$opt_help,
  81. 'share=s' => \$opt_share,
  82. ) or usage(0);
  83. @@ -222,6 +229,11 @@
  84. #-- hey, we can't live without channel data, so let's get that now!
  85. my %channels = get_channels();
  86. +#-- if wanted, get the channel logos (only in list-channels-mode done here!)
  87. +my %icons;
  88. +%icons = get_icons() if $opt_icons && $opt_list_channels;
  89. +
  90. +
  91. # share/ directory for storing channel mapping files. This next line
  92. # is altered by processing through tv_grab_de_tvtoday.PL. But we can
  93. # use the current directory instead of share/tv_grab_de_tvtoday for
  94. @@ -331,8 +343,11 @@
  95. if ($mode eq 'list-channels') {
  96. foreach (keys %channels) {
  97. - $writer->write_channel({'id'=>channel_id($_),
  98. - 'display-name'=>[[$channels{$_}, $lang]]});
  99. + my %channel = ('id' => channel_id($_),
  100. + 'display-name' => [[$channels{$_}, $lang]]);
  101. + $channel{'icon'} = [{'src' => "http://www.tvtoday.de" . $icons{$_}}]
  102. + if(defined($icons{$_}));
  103. + $writer->write_channel(\%channel);
  104. }
  105. $writer->end();
  106. @@ -346,14 +361,10 @@
  107. die "No channels specified, run me with --configure flag\n" unless(scalar(@requests));
  108. +#-- We need to wait with writing the channels, therefore buffer the program-infos
  109. +my @writebuffer;
  110. -#-- write out <channel> tags
  111. -foreach(@requests) {
  112. - $writer->write_channel({'id'=>channel_id($_),
  113. - 'display-name'=>[[$channels{$_}, $lang]]});
  114. -}
  115. -
  116. -#-- write out <programme> tags
  117. +#-- get <programme> tags
  118. my $numdays = $opt_days + $opt_offset - 1;
  119. my $bar = new Term::ProgressBar('grabbing', scalar(@requests) * $opt_days)
  120. if Have_bar && not $opt_quiet;
  121. @@ -365,6 +376,18 @@
  122. }
  123. }
  124. +#-- write out <channel> tags
  125. +foreach(@requests) {
  126. + my $id = channel_id($_);
  127. + my %channel = ('id' => $id,
  128. + 'display-name' => [[$channels{$_}, $lang]]);
  129. + $channel{'icon'} = [{'src' => "http://www.tvtoday.de" . $icons{$id}}]
  130. + if(defined($icons{$id}));
  131. + $writer->write_channel(\%channel);
  132. +}
  133. +
  134. +#-- write out <program> tags
  135. +$writer->write_programme($_) foreach(@writebuffer);
  136. #-- hey, looks like we've finished ...
  137. $writer->end();
  138. @@ -466,7 +489,9 @@
  139. @el = $el[0]->content_list();
  140. - $_ = shift @el; #-- in this column there's only the logo of the tv station, ignore that
  141. + $_ = shift @el; #-- in this column there's the logo of the tv station
  142. + $icons{$grab->{'channel'}} = $_->look_down('_tag' => 'img')->attr('src')
  143. + unless(exists($icons{$grab->{'channel'}}));
  144. $_ = shift @el; #-- there we should have the time when our show begins ...
  145. die "unable to extract time-information from html code, content:\n", $_->as_text()
  146. @@ -490,6 +515,10 @@
  147. if (ref($span) eq "") {
  148. $span =~ s/\s*\([^\(]+\)\s*$//;
  149. + if ($span =~ s/\s*(\d+)\.\sTeil//gi) {
  150. + #- strip episode number from title field
  151. + $show{q(episode-num)} = [ [ $1, "onscreen" ] ];
  152. + }
  153. $show{title} = [[ $span, $lang ]];
  154. }
  155. elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") {
  156. @@ -500,6 +529,10 @@
  157. my $title = ($tag->content_list())[0];
  158. $title =~ s/\s*\([^\(]+\)\s*$//;
  159. + if ($title =~ s/\s*(\d+)\.\sTeil//gi) {
  160. + #- strip episode number from title field
  161. + $show{q(episode-num)} = [ [ $1, "onscreen" ] ];
  162. + }
  163. $show{title} = [[ $title, $lang ]];
  164. }
  165. else { die }
  166. @@ -539,7 +572,36 @@
  167. #-- okay, commit that data now ...
  168. $show{channel} = $grab->{channel};
  169. - $writer->write_programme(\%show);
  170. +
  171. + #-- try to construct clumps, if necessary ...
  172. + if(defined($show{q(desc)})
  173. + && $show{q(desc)}->[0][0] =~ m/^anschl\.\s+(.*)/) {
  174. + my $clumpname = $1;
  175. +
  176. + delete $show{q(desc)};
  177. + $show{q(clumpidx)} = '0/2'; # first of two shows ...
  178. + push @writebuffer, \%show;
  179. +
  180. + my %newshow;
  181. + foreach(qw(start stop channel)) { $newshow{$_} = $show{$_}; }
  182. + $newshow{q(clumpidx)} = '1/2'; # second show ...
  183. +
  184. + #- $clumpname may contain a extra VPS start time ...
  185. + if($clumpname =~ s/\s+\(VPS ([012]?[0-9])\.([0-6][0-9])\)//) {
  186. + $newshow{q(vps-start)} = $newshow{q(start)};
  187. + substr($newshow{"vps-start"}, 8, 4) = sprintf("%02d%02d", $1, $2);
  188. + }
  189. +
  190. + warn("title of clumped show contains problematic chars, please take care")
  191. + if($clumpname =~ m/[,;:\*]/);
  192. +
  193. + $newshow{q(title)} = [[ $clumpname, $lang ]];
  194. + push @writebuffer, \%newshow;
  195. + }
  196. + else {
  197. + #-- common clumpless show, write out ...
  198. + push @writebuffer, \%show;
  199. + }
  200. last if($grab->{"lasttime"} >= 86400 && !$grab->{"lastday"});
  201. }
  202. @@ -698,9 +760,10 @@
  203. my $show = shift;
  204. my @newdesc;
  205. - #push(@newdesc, $show->{"desc"}->[0][0]) if($show->{"desc"});
  206. - if(my @parts = ($$desc =~ m/^\s*(\(([^\)]*)\))?\s+([^,;]+)(,\s+([^,;]+)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?\s*(?:; (Buch\/Regie|R): ([^;]+))?(; D: (.+))?\s*$/)) {
  207. + # try to match <category>, <country> <year>; R: <names>; D: <names> construct
  208. + # where <country>/<year> or the [RD]: stuff may be missing ...
  209. + if(my @parts = ($$desc =~ m/^\s*(\(([^\)]*)\))?\s+([^,;]+)(,\s+([^,;]+)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?\s*; (?:(?:; )?(Buch\/Regie|R): ([^;]+))?\s*((?:; )?D: (.+))?\s*$/)) {
  210. $$desc = "";
  211. #-- $parts[1] is the show title in English (doesn't have to be available)
  212. @@ -806,14 +869,6 @@
  213. next;
  214. }
  215. - if (s/\(VPS ([0-2][0-9])\.([0-5][0-9])\)//) {
  216. - # assume that vps begins on the same day as the actual show,
  217. - # thus simply overwrite the stored 'start' information
  218. - # might do trouble when daylight savings time begins/ends
  219. - $show->{"vps-start"} = $show->{"start"};
  220. - substr($show->{"vps-start"}, 8, 6) = "$1$200";
  221. - }
  222. -
  223. if (my ($type, $names) = m/^\s*(Reporter:|Moderation:|Kommentar:|Gast:|G�ste:|Mit|Film von)\s+(?!de[nm]\s+)(.*?)\s*$/) {
  224. $names =~ s/\s*u.a.\s*$//;
  225. $names =~ s/\([^\(\)]+\)//g; #-- remove all brackets, that further describe the person
  226. @@ -1002,10 +1057,45 @@
  227. +#-- get channel logos
  228. +sub get_icons() {
  229. + my %icons;
  230. + my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=";
  231. + my $chan;
  232. + my $tag;
  233. + my $addr;
  234. +
  235. + my $bar = new Term::ProgressBar('grabbing icons', scalar(keys(%channels)))
  236. + if Have_bar && not $opt_quiet;
  237. +
  238. + foreach (keys %channels) {
  239. + my $tb = new HTML::TreeBuilder();
  240. + $tb->parse(get_page($url.$_));
  241. + $tag = $tb->look_down('_tag' => 'img',
  242. + sub {
  243. + return ($_[0]->attr('src') =~ m/^\/tv\/programm\/bilder\/senderlogos\//);
  244. + });
  245. +
  246. + update $bar if Have_bar && not $opt_quiet;
  247. +
  248. + unless(ref($tag) eq "HTML::Element") {
  249. + $tb->delete;
  250. + next;
  251. + };
  252. +
  253. + $icons{$_} = $tag->attr('src');
  254. + $tb->delete;
  255. + }
  256. +
  257. + return %icons;
  258. +}
  259. +
  260. +
  261. +
  262. #-- get channel listing
  263. sub get_channels() {
  264. my %channels;
  265. - my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=ZDF";
  266. + my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle";
  267. my $tb=new HTML::TreeBuilder();
  268. $tb->parse(get_page($url));