From eb9c84e96236a62cd47b3565d6071aff959b22f7 Mon Sep 17 00:00:00 2001 From: fake Date: Fri, 24 Sep 2004 13:24:40 +0000 Subject: [PATCH] fake: xmltv: add a patch updating tv_grab_de_tvtoday with some changes from xmltv's cvs. this makes it usable with mythtv. [2004091516421628511] (https://www.rocklinux.net/submaster) git-svn-id: http://www.rocklinux.org/svn/rock-linux/trunk@4377 c5f82cb5-29bc-0310-9cd0-bff59a50e3bc --- .../fake/xmltv/tv-grab-de-tvtoday-cvs.patch | 282 ++++++++++++++++++ 1 file changed, 282 insertions(+) create mode 100644 package/fake/xmltv/tv-grab-de-tvtoday-cvs.patch diff --git a/package/fake/xmltv/tv-grab-de-tvtoday-cvs.patch b/package/fake/xmltv/tv-grab-de-tvtoday-cvs.patch new file mode 100644 index 000000000..584d5c13f --- /dev/null +++ b/package/fake/xmltv/tv-grab-de-tvtoday-cvs.patch @@ -0,0 +1,282 @@ +--- ./grab/de_tvtoday/tv_grab_de_tvtoday.in.orig 2004-09-15 15:41:16.423279656 +0200 ++++ ./grab/de_tvtoday/tv_grab_de_tvtoday.in 2004-09-15 15:47:54.423774384 +0200 +@@ -17,7 +17,7 @@ + [--days N] [--offset N] + [--quiet] [--slow] [--nosqueezeout] + +-tv_grab_de_tvtoday --list-channels ++tv_grab_de_tvtoday --list-channels [--icons] + + =head1 DESCRIPTION + +@@ -62,6 +62,10 @@ + B<--list-channels> write output giving elements for every + channel available (ignoring the config file), but no programmes. + ++B<--icons> get the URL for channel-logos together with the channel-list. ++Mind that this takes a long time, since a webpage has to be requested for ++every channel. ++ + B<--help> print a help message and exit. + + =head1 SEE ALSO +@@ -83,7 +87,7 @@ + use warnings; + use strict; + use Date::Manip; +-use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.13 2004/05/09 17:49:11 epaepa Exp $ '; ++use XMLTV::Version '$Id: tv_grab_de_tvtoday.in,v 1.19 2004/07/17 14:45:34 stesie Exp $ '; + use Getopt::Long; + use HTML::TreeBuilder; + use HTML::Entities; +@@ -101,7 +105,7 @@ + To grab data: $0 [--config-file FILE] [--output FILE] + [--days N] [--offset N] + [--quiet] [--slow] [--nosqueezeout] +-Channel List: $0 --list-channels ++Channel List: $0 --list-channels [--icons] + END + ; + +@@ -123,6 +127,7 @@ + sub squeeze_out_desc($$); + sub refine_category_attr($$); + sub get_channels(); ++sub get_icons(); + sub channel_id($); + sub split_up_names($$); + sub parse_date_data($); +@@ -133,7 +138,7 @@ + sub refine_credits($); + + #-- Category-Matching RegExp +-our constant $category_regexp = '^(.*?\s+)?((?:[\wäöüßÄÖÜ-]+-?)?(?:[Aa]genten|[Cc]harts|[Dd]oku(?:mentar|mentation)?|Episoden|[Dd]rama|[Kk]rimi|[Kk]omödie|[Ll]iteratur|[Mm]agazin|[Mm]elodram|[Pp]orträt|[Rr]eportage|[Rr]eihe|[Ss]oap|[Ss]atire|[Ss]erie|[Ss]tudie|[Tt]alk|[Tt]hriller)-?(?:[Ff]ilm|[Mm]ovie|[Ss]how)?s?)([\s;,]+.*)?$'; ++our constant $category_regexp = '^(.*?\s+)?((?:[\wäöüßÄÖÜ-]+-?)?(?:[Aa]genten|[Cc]harts|[Cc]omedy|[Dd]oku(?:mentar|mentation)?|Episoden|[Dd]rama|[Kk]rimi|[Kk]omödie|[Ll]iteratur|[Mm]agazin|[Mm]elodram|[Pp]orträt|[Rr]eportage|[Rr]eihe|[Ss]oap|[Ss]atire|[Ss]erie|[Ss]tudie|[Tt]alk|[Tt]hriller|Wunschclip)-?(?:[Ff]ilm|[Mm]ovie|[Ss]how)?s?)([\s;,]+.*)?$'; + + #-- DEBUG FLUFF ... + my $debug = 0; +@@ -166,6 +171,7 @@ + my $opt_slow = 0; + my $opt_nosqueeze = 0; + my $opt_list_channels; ++my $opt_icons = 0; + my $opt_help; + my $opt_share; + +@@ -179,6 +185,7 @@ + 'slow' => \$opt_slow, + 'nosqueezeout' => \$opt_nosqueeze, + 'list-channels' => \$opt_list_channels, ++ 'icons' => \$opt_icons, + 'help' => \$opt_help, + 'share=s' => \$opt_share, + ) or usage(0); +@@ -222,6 +229,11 @@ + #-- hey, we can't live without channel data, so let's get that now! + my %channels = get_channels(); + ++#-- if wanted, get the channel logos (only in list-channels-mode done here!) ++my %icons; ++%icons = get_icons() if $opt_icons && $opt_list_channels; ++ ++ + # share/ directory for storing channel mapping files. This next line + # is altered by processing through tv_grab_de_tvtoday.PL. But we can + # use the current directory instead of share/tv_grab_de_tvtoday for +@@ -331,8 +343,11 @@ + + if ($mode eq 'list-channels') { + foreach (keys %channels) { +- $writer->write_channel({'id'=>channel_id($_), +- 'display-name'=>[[$channels{$_}, $lang]]}); ++ my %channel = ('id' => channel_id($_), ++ 'display-name' => [[$channels{$_}, $lang]]); ++ $channel{'icon'} = [{'src' => "http://www.tvtoday.de" . $icons{$_}}] ++ if(defined($icons{$_})); ++ $writer->write_channel(\%channel); + } + + $writer->end(); +@@ -346,14 +361,10 @@ + die "No channels specified, run me with --configure flag\n" unless(scalar(@requests)); + + ++#-- We need to wait with writing the channels, therefore buffer the program-infos ++my @writebuffer; + +-#-- write out tags +-foreach(@requests) { +- $writer->write_channel({'id'=>channel_id($_), +- 'display-name'=>[[$channels{$_}, $lang]]}); +-} +- +-#-- write out tags ++#-- get tags + my $numdays = $opt_days + $opt_offset - 1; + my $bar = new Term::ProgressBar('grabbing', scalar(@requests) * $opt_days) + if Have_bar && not $opt_quiet; +@@ -365,6 +376,18 @@ + } + } + ++#-- write out tags ++foreach(@requests) { ++ my $id = channel_id($_); ++ my %channel = ('id' => $id, ++ 'display-name' => [[$channels{$_}, $lang]]); ++ $channel{'icon'} = [{'src' => "http://www.tvtoday.de" . $icons{$id}}] ++ if(defined($icons{$id})); ++ $writer->write_channel(\%channel); ++} ++ ++#-- write out tags ++$writer->write_programme($_) foreach(@writebuffer); + + #-- hey, looks like we've finished ... + $writer->end(); +@@ -466,7 +489,9 @@ + + @el = $el[0]->content_list(); + +- $_ = shift @el; #-- in this column there's only the logo of the tv station, ignore that ++ $_ = shift @el; #-- in this column there's the logo of the tv station ++ $icons{$grab->{'channel'}} = $_->look_down('_tag' => 'img')->attr('src') ++ unless(exists($icons{$grab->{'channel'}})); + + $_ = shift @el; #-- there we should have the time when our show begins ... + die "unable to extract time-information from html code, content:\n", $_->as_text() +@@ -490,6 +515,10 @@ + + if (ref($span) eq "") { + $span =~ s/\s*\([^\(]+\)\s*$//; ++ if ($span =~ s/\s*(\d+)\.\sTeil//gi) { ++ #- strip episode number from title field ++ $show{q(episode-num)} = [ [ $1, "onscreen" ] ]; ++ } + $show{title} = [[ $span, $lang ]]; + } + elsif (ref($span) eq "HTML::Element" and $span->tag eq "a") { +@@ -500,6 +529,10 @@ + + my $title = ($tag->content_list())[0]; + $title =~ s/\s*\([^\(]+\)\s*$//; ++ if ($title =~ s/\s*(\d+)\.\sTeil//gi) { ++ #- strip episode number from title field ++ $show{q(episode-num)} = [ [ $1, "onscreen" ] ]; ++ } + $show{title} = [[ $title, $lang ]]; + } + else { die } +@@ -539,7 +572,36 @@ + + #-- okay, commit that data now ... + $show{channel} = $grab->{channel}; +- $writer->write_programme(\%show); ++ ++ #-- try to construct clumps, if necessary ... ++ if(defined($show{q(desc)}) ++ && $show{q(desc)}->[0][0] =~ m/^anschl\.\s+(.*)/) { ++ my $clumpname = $1; ++ ++ delete $show{q(desc)}; ++ $show{q(clumpidx)} = '0/2'; # first of two shows ... ++ push @writebuffer, \%show; ++ ++ my %newshow; ++ foreach(qw(start stop channel)) { $newshow{$_} = $show{$_}; } ++ $newshow{q(clumpidx)} = '1/2'; # second show ... ++ ++ #- $clumpname may contain a extra VPS start time ... ++ if($clumpname =~ s/\s+\(VPS ([012]?[0-9])\.([0-6][0-9])\)//) { ++ $newshow{q(vps-start)} = $newshow{q(start)}; ++ substr($newshow{"vps-start"}, 8, 4) = sprintf("%02d%02d", $1, $2); ++ } ++ ++ warn("title of clumped show contains problematic chars, please take care") ++ if($clumpname =~ m/[,;:\*]/); ++ ++ $newshow{q(title)} = [[ $clumpname, $lang ]]; ++ push @writebuffer, \%newshow; ++ } ++ else { ++ #-- common clumpless show, write out ... ++ push @writebuffer, \%show; ++ } + + last if($grab->{"lasttime"} >= 86400 && !$grab->{"lastday"}); + } +@@ -698,9 +760,10 @@ + my $show = shift; + my @newdesc; + +- #push(@newdesc, $show->{"desc"}->[0][0]) if($show->{"desc"}); + +- if(my @parts = ($$desc =~ m/^\s*(\(([^\)]*)\))?\s+([^,;]+)(,\s+([^,;]+)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?\s*(?:; (Buch\/Regie|R): ([^;]+))?(; D: (.+))?\s*$/)) { ++ # try to match , ; R: ; D: construct ++ # where / or the [RD]: stuff may be missing ... ++ if(my @parts = ($$desc =~ m/^\s*(\(([^\)]*)\))?\s+([^,;]+)(,\s+([^,;]+)\s+([12][09][0-9]{2}(?:[\/-][0-9]{2})?))?\s*; (?:(?:; )?(Buch\/Regie|R): ([^;]+))?\s*((?:; )?D: (.+))?\s*$/)) { + $$desc = ""; + + #-- $parts[1] is the show title in English (doesn't have to be available) +@@ -806,14 +869,6 @@ + next; + } + +- if (s/\(VPS ([0-2][0-9])\.([0-5][0-9])\)//) { +- # assume that vps begins on the same day as the actual show, +- # thus simply overwrite the stored 'start' information +- # might do trouble when daylight savings time begins/ends +- $show->{"vps-start"} = $show->{"start"}; +- substr($show->{"vps-start"}, 8, 6) = "$1$200"; +- } +- + if (my ($type, $names) = m/^\s*(Reporter:|Moderation:|Kommentar:|Gast:|Gäste:|Mit|Film von)\s+(?!de[nm]\s+)(.*?)\s*$/) { + $names =~ s/\s*u.a.\s*$//; + $names =~ s/\([^\(\)]+\)//g; #-- remove all brackets, that further describe the person +@@ -1002,10 +1057,45 @@ + + + ++#-- get channel logos ++sub get_icons() { ++ my %icons; ++ my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender="; ++ my $chan; ++ my $tag; ++ my $addr; ++ ++ my $bar = new Term::ProgressBar('grabbing icons', scalar(keys(%channels))) ++ if Have_bar && not $opt_quiet; ++ ++ foreach (keys %channels) { ++ my $tb = new HTML::TreeBuilder(); ++ $tb->parse(get_page($url.$_)); ++ $tag = $tb->look_down('_tag' => 'img', ++ sub { ++ return ($_[0]->attr('src') =~ m/^\/tv\/programm\/bilder\/senderlogos\//); ++ }); ++ ++ update $bar if Have_bar && not $opt_quiet; ++ ++ unless(ref($tag) eq "HTML::Element") { ++ $tb->delete; ++ next; ++ }; ++ ++ $icons{$_} = $tag->attr('src'); ++ $tb->delete; ++ } ++ ++ return %icons; ++} ++ ++ ++ + #-- get channel listing + sub get_channels() { + my %channels; +- my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=ZDF"; ++ my $url="http://www.tvtoday.de/tv/programm/programm.php?ztag=0&sparte=alle&uhrzeit=Ax00&sender=alle"; + + my $tb=new HTML::TreeBuilder(); + $tb->parse(get_page($url));