#!/usr/bin/perl -w # Australian TV Guide XMLTV grabber by Damon Searle # Derived from a yahoo XMLTV grabber by Ron Kellam which was itself... # Derived from original code by Justin Hawkins # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA # 30 Oct 2004 # Damon Searle # - wrote first version # - gets data from NineMSN as a backup. Its not that fancy, # 31 Oct 2004 # Fred Donelly # - added an option so that the output file can be specified on the # command line and from the quick test I gave it, it now works with # mythfilldatabase. # - $offset set to +1000 at the top and then had "+1000" set in a # output string further down rather than the variable # 4 Nov 2004 # Paul Andreassen # - learned some perl and now wants to go back to python # - added and then reduced status info # - retry on failure to getstore # - changed cache to '/var/local/tv_grab_au' # - added threading for each day # 5 Nov 2004 # - improved threading with use of queue # Eyal Lebedinsky # - easier location selection # 8 Nov 2004 # - fixed pid=0 bug # - did some merging, I hate merging # 9 Nov 2004 # Rob Hill # - added Sydney use strict; use Getopt::Long; use XMLTV; use LWP::Simple; use Date::Manip; use File::Path; use threads; use Thread::Queue; # Instructions: # Select your region and source. # If your location isn't listed below, go to # http://tvguide.ninemsn.com.au/guide/ select your area # look at the last number in the URL before ".asp" and set # the region variable below. Then put the channel names as listed # on the tv guide site into the variables below. # Then set your XMLTV ids from the database in the _XMLTVID variables. # # If it doesn't work with mythfilldatabase, try: # ./tv_grab_au # mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml # pick your region # #my $location = "Brisbane"; #my $location = "Canberra"; my $location = "Sydney"; # pick your source # #my $source = "free"; my $source = "freesd"; #my $source = "freehd"; # choose the XMLID URL suffix that mythtv knows # my $XMLTVID_URL = "d1.com.au"; # change to how you think it should work my $days_to_grab = 7; my $threads = 10; my $retrys = 5; # Variables my $guide_url = "http://tvguide.ninemsn.com.au/guide/"; my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid="; my $cache_dir = "/var/local/tv_grab_au"; my $XMLTV_prefix = $source . "." . $location . "."; my $XMLTV_suffix = "." . $XMLTVID_URL; my $region; my $offset; my $ABC; my $Prime; my $SBS; my $Ten; my $WIN; my $ABC_XMLTVID; my $Prime_XMLTVID; my $SBS_XMLTVID; my $Ten_XMLTVID; my $WIN_XMLTVID; if ("Canberra" eq $location) { $region = "126"; $offset = "+1100"; $ABC = "ABC NSW"; $Prime = "Prime Southern"; #Channel 7 in Sydney/Melbourne/etc $SBS = "SBS Sydney"; $Ten = "Southern Cross TEN Capital"; $WIN = "WIN Television NSW"; #Channel 9 in Sydney/Melbourne/etc if ("free" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "PrimS" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10Cap" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "WIN" . $XMLTV_suffix; } elsif ("freesd" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } else { print "Unknows source '$source' for $location\n"; exit (1); } } elsif ("Brisbane" eq $location) { $region = "79"; $offset = "+1000"; $ABC = "ABC QLD"; $Prime = "Channel Seven Queensland"; $SBS = "SBS Queensland"; $Ten = "Southern Cross TEN Queensland"; $WIN = "WIN Television QLD"; if ("free" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } else { print "Unknows source '$source' for $location\n"; exit (1); } } elsif ("Sydney" eq $location) { $region = "73"; $offset = "+1000"; $ABC = "ABC NSW"; $Prime = "Channel Seven Sydney"; $SBS = "SBS Sydney"; $Ten = "Network TEN Sydney"; $WIN = "Channel Nine Sydney"; if ("free" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } elsif ("freesd" eq $source) { $ABC_XMLTVID = $XMLTV_prefix . "2" . $XMLTV_suffix; $Prime_XMLTVID = $XMLTV_prefix . "7" . $XMLTV_suffix; $SBS_XMLTVID = $XMLTV_prefix . "SBS" . $XMLTV_suffix; $Ten_XMLTVID = $XMLTV_prefix . "10" . $XMLTV_suffix; $WIN_XMLTVID = $XMLTV_prefix . "9" . $XMLTV_suffix; } else { print "Unknows source '$source' for $location\n"; exit (1); } } else { print "Unknows location '$location'\n"; exit (1); } my $prog_ref; my $chan_ref; $$chan_ref{$ABC} = { 'id' => $ABC_XMLTVID, 'display-name' => [ [ $ABC, undef ]]}; $$chan_ref{$Prime} = { 'id' => $Prime_XMLTVID, 'display-name' => [ [ $Prime, undef ]]}; $$chan_ref{$SBS} = { 'id' => $SBS_XMLTVID, 'display-name' => [ [ $SBS, undef ]]}; $$chan_ref{$Ten} = { 'id' => $Ten_XMLTVID, 'display-name' => [ [ $Ten, undef ]]}; $$chan_ref{$WIN} = { 'id' => $WIN_XMLTVID, 'display-name' => [ [ $WIN, undef ]]}; # Options my $opt_days; my $opt_output; my $opt_config; GetOptions('days=i' => \$opt_days, 'output=s' => \$opt_output, 'config=s' => \$opt_config, ); if ($opt_days) { $days_to_grab = $opt_days } if (!($opt_output)) { $opt_output = $cache_dir . "/guide.xml"; } # $opt_config should probably do something print "$days_to_grab, $opt_output\n"; print "starting $threads threads\n"; my @thrlist; my $datepids = Thread::Queue->new; for (my $thread=0; $thread<$threads; $thread++) { push @thrlist, threads->new(\&fetch_details); } print "loading queue\n"; my $currentday = &ParseDate("today"); my $day_counter = 1; while ($day_counter <= $days_to_grab) { my $date = &UnixDate($currentday, "%d%m%Y"); my @day_lines = get_day($date); my @pids; foreach my $line (@day_lines) { foreach my $link (split /\n|tr|TR|TD|tr/, $line ) { if ($link =~ /closeup\/default.asp/) { $link =~ s/.+pid=//g; $link =~ s/".+//g; if ($link =~ /\d\d+/) { $datepids->enqueue($date . "-" . $link); } } } } $day_counter++; $currentday = &DateCalc($currentday, "+ 1 day"); } for (my $thread=0; $thread<$threads; $thread++) { $datepids->enqueue(0 . "-" . 0); } print "queue is complete\n"; foreach my $thr (@thrlist) { $thr->join; } print "all threads done\n"; $currentday = &ParseDate("today"); $day_counter = 1; while ($day_counter <= $days_to_grab) { my $date = &UnixDate($currentday, "%d%m%Y"); my @day_lines = get_day($date); my @pids; foreach my $line (@day_lines) { foreach my $link (split /\n|tr|TR|TD|tr/, $line ) { if ($link =~ /closeup\/default.asp/) { $link =~ s/.+pid=//g; $link =~ s/".+//g; if ($link =~ /\d\d+/) { push @pids, $link; } } } } foreach my $pid (@pids) { my $retry = 0; my @details = get_details($date, $pid); my $show_details_table = ""; my $use_line = 0; foreach my $line (@details) { if ($line =~ /bgColor=#f7f3e8/) { $use_line = 0; } if ($use_line == 1) { $show_details_table .= $line; } if ($line =~ /bgcolor=#ffffff/) { $use_line = 1; } } $show_details_table =~ s/<[^>]*>/\n/g; $show_details_table =~ s/\ \;//g; #$show_details_table =~ s/
||<\/B><\/b>/\n/g; #$show_details_table =~ s/Genre://g; #$show_details_table =~ s/Rated:/\n/g; my $count = 0; my $channel = ""; my $start_date = &UnixDate($currentday, "%Y-%m-%d"); my $time; my $title1 = ""; my $title2 = ""; my $genre = ""; my $descr = ""; my $details = ""; my $duration; #print $show_details_table. "\n\n\n"; foreach my $line (split /\n/, $show_details_table) { if ($count == 4){ #print "Time: " . $line . "\n"; $time = $line; } elsif ($count == 7){ $channel = $line; #print "Channel: " . $line . "\n"; } elsif ($count == 19){ $title1 = $line; #print "Program: " . $line . "\n"; } elsif ($count == 20){ $line =~ s/ - //g; $title2 = $line; #print "Subtitle: " . $line . "\n"; } elsif ($count == 21){ $line =~ s/\D//g; $duration = $line; #print "Run time: " . $line . "\n"; } elsif ($count == 22){ $line =~ s/[^A-Z]//g; $details = $line; #print "Rating: " . $line . "\n"; } elsif ($count == 26){ $line =~ s/ //g; $genre = $line; #print "Genre: " . $line . "\n"; } elsif ($count == 28 && $line =~ /[a-zA-Z]/){ $descr = $line; #print "Description: " . $line . "\n"; } #elsif ($count == 26 && $line =~ /[a-zA-Z]/){ # $descr = $line; # print "Description: " . $line . "\n"; #} #print $count .": " . $line . "\n"; ++$count; } if (($count < 21) and ($retry < $retrys)) { $retry++; my $name = $cache_dir . "/" . $date . "/" . $pid . ".html"; print " $name is too short, removing and trying again\n"; unlink $name; push @pids, $pid; next; } my $start_time = &UnixDate($time, "%H:%M"); # my $start_datetime = $start_date . " " . $start_time; if ($start_time =~ /00:|01:|02:|03:|04:|05:/) { $start_date = &DateCalc($start_date, "+ 1 day"); } $start_date = &UnixDate($start_date, "%Y%m%d"); my $end_time = &DateCalc($start_time, " + " . $duration . "minutes"); $end_time = &UnixDate($end_time, "%H:%M"); my $end_date; if (&Date_Cmp($start_time, $end_time) <= 0) { $end_date = $start_date; } else { my $err; my $edate = &DateCalc($start_date, "+ 1 day", \$err); $end_date = &UnixDate($edate, "%Y%m%d"); } if ($channel =~ /$ABC/) { $channel = $ABC_XMLTVID; } elsif ($channel =~ /$Prime/) { $channel = $Prime_XMLTVID; } elsif ($channel =~ /$SBS/) { $channel = $SBS_XMLTVID; } elsif ($channel =~ /$Ten/) { $channel = $Ten_XMLTVID; } elsif ($channel =~ /$WIN/) { $channel = $WIN_XMLTVID; } my $start; my $stop; $start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset; $stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset; my $a_prog = { channel => $channel, start => $start, stop => $stop, title => [ [ $title1, undef ] ] }; $descr =~ s/^\s+//; $descr =~ s/\s+$//; if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; } if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; } if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; } push @$prog_ref, $a_prog; } $currentday = &DateCalc($currentday, "+ 1 day"); $day_counter++; } my $data = [ 'ISO-8859-1', { 'source-info-name' => 'http://tvguide.ninemsn.com.au/', 'generator-info-name' => 'NineMSN grabber', 'generator-info-url' => '', 'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2" }, $chan_ref, $prog_ref ]; my $fh = new IO::File ">$opt_output"; XMLTV::write_data($data, OUTPUT=>$fh); # subroutines sub get_day { my $date = shift; my $url = $guide_url . $date . "_" . $region . ".asp"; my $guide_dir = $cache_dir . "/" . $date; my $guide_file = $guide_dir . "/guide.html"; mkpath ($guide_dir); if (!(-e $guide_file)) { getstore($url, $guide_file); print "."; for (my $retry=1; (!(-e $guide_file)) && ($retry<$retrys); $retry++) { getstore($url, $guide_file); print ":"; } } open(GUIDE, $guide_file) or print "\nCan't open $guide_file\n"; my @guide_lines = ; close(GUIDE); return @guide_lines; } sub get_details { my $date = shift; my $program_id = shift; my $url = $details_url . $program_id; my $guide_dir = $cache_dir . "/" . $date; my $details_file = $guide_dir . "/" . $program_id . ".html"; mkpath ($guide_dir); if (!(-e $details_file)) { getstore($url, $details_file); print "."; for (my $retry=1; (!(-e $details_file)) && ($retry<$retrys); $retry++) { getstore($url, $details_file); print ":"; } } open(DETAILS, $details_file) or print "\nCan't open $details_file\n"; my @details_lines =
; close(DETAILS); return @details_lines; } sub fetch_details { my $datepid=$datepids->dequeue; my @datepidl=split /-/, $datepid; my $date = $datepidl[0]; my $pid = $datepidl[1]; while (($date!=0) and ($pid!=0)) { my $guide_dir = $cache_dir . "/" . $date; mkpath ($guide_dir); my $url = $details_url . $pid; my $details_file = $guide_dir . "/" . $pid . ".html"; for (my $retry=0; (!(-e $details_file)) && ($retry<$retrys); $retry++) { getstore($url, $details_file); } $datepid=$datepids->dequeue; @datepidl=split /-/, $datepid; $date = $datepidl[0]; $pid = $datepidl[1]; } }