[mythtv-users] New Australian XMLTV grabber
Eyal Lebedinsky
eyal at eyal.emu.id.au
Wed Nov 17 12:33:20 UTC 2004
Michael Cheshire (Mailing Lists) wrote:
> The great script, updated with foxtel channels.
If I did not break anything else then this is the same thing, as an attachment
so that whitespace is not lost, and with one misspelling fixed...
--
Eyal Lebedinsky (eyal at eyal.emu.id.au) <http://samba.org/eyal/>
-------------- next part --------------
#!/usr/bin/perl -w
# Australian TV Guide XMLTV grabber by Damon Searle
# Derived from a yahoo XMLTV grabber by Ron Kellam which was itself...
# Derived from original code by Justin Hawkins
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# 30 Oct 2004
# Damon Searle <djsearle at netspace.net.au>
# - wrote first version
# - gets data from NineMSN as a backup. Its not that fancy,
# 31 Oct 2004
# Fred Donelly <fdonelly at hotmail.com>
# - added an option so that the output file can be specified on the
# command line and from the quick test I gave it, it now works with
# mythfilldatabase.
# - $offset set to +1000 at the top and then had "+1000" set in a
# output string further down rather than the variable
# 4 Nov 2004
# Paul Andreassen <paulx at andreassen.com.au>
# - learned some perl and now wants to go back to python
# - added and then reduced status info
# - retry on failure to getstore
# - changed cache to '/var/local/tv_grab_au'
# - added threading for each day
# 5 Nov 2004
# - improved threading with use of queue
# Eyal Lebedinsky <eyal at eyal.emu.id.au>
# - easier location selection
# 8 Nov 2004
# Paul
# - fixed pid=0 bug
# - did some merging, I hate merging
# 9 Nov 2004
# Rob Hill <rob at dot.net.au>
# - added Sydney
# 10 Nov 2004
# Mary Wright <mwright at taz-devil.dyndns.org>
# - digital info for Sydney
# Paul
# - more cleanup and improved error checking
# - used mirror instead of getstore to get any updates
# - mirror didn't work replaced with own smarts to check for updates to times
# 11 Nov 2004
# - added program name in check
# 13 Nov 2004
# - added freesd for Brisbane
# 14 Nov 2004
# - --configure to exit nicely
# - if no program data then skip program nicely, mainly for foxtel data
# - added foxtel channels
# 17 Nov 2004
# - added remaining foxtel channels
# Eyal Lebedinsky <eyal at eyal.emu.id.au>
# - Fix misspelling Unknows -> Unknown
# - Note: is Sydney now is on summer time +1100?
use strict;
use Getopt::Long;
use XMLTV;
use LWP::Simple;
use Date::Manip;
use File::Path;
use threads;
use Thread::Queue;
# Instructions:
# Select your region and source.
# If your location isn't listed below, go to
# http://tvguide.ninemsn.com.au/guide/ select your area
# look at the last number in the URL before ".asp" and set
# the region variable below. Then put the channel names as listed
# on the tv guide site into the variables below.
# Then set your XMLTV ids from the database in the XMLTVID_URL variable.
#
# If it doesn't work with mythfilldatabase, try:
# ./tv_grab_au
# mythfilldatabase --file 1 -1 /var/local/tv_grab_au/guide.xml
# pick your region
#
my $location = "Canberra";
#my $location = "Brisbane";
#my $location = "Sydney";
#my $location = "Australia";
# pick your source
#
my $source = "free";
#my $source = "freesd";
#my $source = "freehd";
#my $source = "foxtel";
# choose the XMLID URL suffix that mythtv knows
#
my $XMLTVID_URL = "d1.com.au";
# change to how you think it should work
my $days_to_grab = 7;
my $threads = 5;
my $retrys = 3;
my $secondsbeforeretry = 2;
# Variables
my $guide_url = "http://tvguide.ninemsn.com.au/guide/";
my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid=";
my $cache_dir = "/var/local/tv_grab_au";
my $XMLTV_prefix = $source . "." . $location . ".";
my $XMLTV_suffix = "." . $XMLTVID_URL;
my $region; my $offset;
my %channels;
if ("Canberra" eq $location) {
$region = "126";
$offset = "+1100";
if ("free" eq $source) {
$channels{"ABC NSW"}="2";
$channels{"Prime Southern"}="PrimS";
$channels{"SBS Sydney"}="SBS";
$channels{"Southern Cross TEN Capital"}="10Cap";
$channels{"WIN Television NSW"}="WIN"
} elsif ("freesd" eq $source or "freehd" eq $source) {
$channels{"ABC NSW"}="2";
$channels{"Prime Southern"}="7";
$channels{"SBS Sydney"}="SBS";
$channels{"Southern Cross TEN Capital"}="10";
$channels{"WIN Television NSW"}="9"
} else {
print "Unknown source '$source' for $location\n";
exit (1);
}
} elsif ("Brisbane" eq $location) {
$region = "79";
$offset = "+1000";
if (("free" eq $source)||("freesd" eq $source)) {
$channels{"ABC QLD"}="2";
$channels{"Channel Seven Queensland"}="7";
$channels{"SBS Queensland"}="SBS";
$channels{"Southern Cross TEN Queensland"}="10";
$channels{"WIN Television QLD"}="9";
} else {
print "Unknown source '$source' for $location\n";
exit (1);
}
} elsif ("Sydney" eq $location) {
$region = "73";
$offset = "+1100";
if (("free" eq $source)||("freesd" eq $source)) {
$channels{"ABC NSW"}="2";
$channels{"Channel Seven Sydney"}="7";
$channels{"SBS Sydney"}="SBS";
$channels{"Network TEN Sydney"}="10";
$channels{"Channel Nine Sydney"}="9";
} else {
print "Unknown source '$source' for $location\n";
exit (1);
}
} elsif ("Adelaide" eq $location) {
$region = "81";
$offset = "+0930";
if (("free" eq $source)||("freesd" eq $source)) {
$channels{"ABC SA"}="2";
$channels{"Channel Seven Adelaide"}="7";
$channels{"SBS"}="SBS";
$channels{"Network TEN Adekaude"}="10";
$channels{"Channel Nine Adekaude"}="9";
} else {
print "Unknown source '$source' for $location\n";
exit (1);
}
} elsif ("Australia" eq $location) {
$region = "123";
$offset = "+0930";
if ("foxtel" eq $source) {
$channels{"Arena TV"}="Arena";
$channels{"BBC World"}="BBC";
$channels{"Cartoon Network"}="Cartoon";
$channels{"Channel [V]"}="Red";
$channels{"CNBC"}="CNBC";
$channels{"CNN"}="CNN";
$channels{"Discovery Channel"}="Disc";
$channels{"FOX News"}="FoxFNC";
$channels{"FOX8"}="FOX";
$channels{"MAX"}="FoxMMX";
$channels{"National Geographic Channel"}="NatGe";
$channels{"Nickelodeon"}="Nick";
$channels{"Showtime"}="Show";
$channels{"Showtime 2"}="FoxSH2";
$channels{"Sky News"}="SkyNews";
$channels{"TV1"}="TV1";
$channels{"UKTV"}="UKTV";
$channels{"Showtime Greats"}="ShowGreats";
$channels{"World Movies"}="wmov";
$channels{"WCH"}="WCH";
$channels{"TVSN"}="TVSN";
$channels{"Sky Racing"}="SkyRa";
$channels{"Ovation"}="Ovation";
$channels{"Disney Channel"}="Disney";
$channels{"Animal Planet"}="Animal";
$channels{"The Comedy Channel"}="Com";
$channels{"The LifeStyle Channel"}="Lifes";
$channels{"FOX Sports 1"}="FoxFS1";
$channels{"Movie One"}="Movie1";
$channels{"TCM"}="TCM";
$channels{"MTV"}="MTV";
$channels{"FOX Sports 2"}="FoxSP2";
$channels{"FOX Footy Channel"}="FFC";
$channels{"Movie Extra"}="MovieEx";
$channels{"Hallmark Channel"}="Hall";
$channels{"The History Channel"}="FoxHST";
$channels{"ESPN"}="ESPN";
$channels{"FOX Classics"}="FoxCLA";
$channels{"Movie Greats"}="MovieGr";
} else {
print "Unknown source '$source' for $location\n";
exit (1);
}
} else {
print "Unknown location '$location'\n";
exit (1);
}
my $prog_ref;
my $chan_ref;
foreach my $channel (keys %channels)
{
$$chan_ref{$channel} =
{
'id' => $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix,
'display-name' => [ [ $channel, undef ]]
};
}
# Options
my $opt_days;
my $opt_output;
my $opt_configfile;
my $opt_configure = 0;
GetOptions('days=i' => \$opt_days,
'output=s' => \$opt_output,
'config-file=s' => \$opt_configfile,
'configure' => \$opt_configure,
);
if ($opt_days) {
$days_to_grab = $opt_days
}
if (!($opt_output)) {
$opt_output = $cache_dir . "/guide.xml";
}
# $opt_configfile should probably do something ('/home/mythtv/.mythtv/tv_grab_au.xmltv')
if ($opt_configure == 1)
{
print "configuration must be done in this script $0\n";
exit (0);
}
print "grabing $days_to_grab days into $opt_output\n";
print "starting $threads threads\n";
my @thrlist;
my $datepids = Thread::Queue->new;
for (my $thread=0; $thread<$threads; $thread++)
{
push @thrlist, threads->new(\&fetch_details);
}
print "loading queue\n";
my $currentday = &ParseDate("today");
my $day_counter = 1;
while ($day_counter <= $days_to_grab)
{
my $date = &UnixDate($currentday, "%d%m%Y");
my @day_lines = get_day($date,1);
if (@day_lines == 0)
{
$currentday = &DateCalc($currentday, "+ 1 day");
$day_counter++;
next;
}
my @pids;
my @rowspans;
my @names;
foreach my $line (@day_lines)
{
foreach my $link (split /\n|tr|TR|TD|tr/, $line )
{
if ($link =~ /closeup\/default.asp/)
{
my $rowspan = $link;
$rowspan =~ s/.+rowspan=//g;
$rowspan =~ s/ .+//g;
my $name = $link;
$name =~ s/.+target=new>(<P>|)//g;
$name =~ s/<\/a>.+//g;
$link =~ s/.+pid=//g;
$link =~ s/".+//g; #"
if (($rowspan =~ /\d+/) and ($link =~ /\d\d+/))
{
push @pids, $link;
push @rowspans, $rowspan;
push @names, $name;
}
}
}
}
if (changed_guide($date, at pids, at rowspans, at names))
{
for (my $count=0; $count <= $#pids; $count++)
{
$datepids->enqueue($date . "-" . $pids[$count]);
}
}
$day_counter++;
$currentday = &DateCalc($currentday, "+ 1 day");
}
for (my $thread=0; $thread<$threads; $thread++)
{
$datepids->enqueue(0 . "-" . 0);
}
print "queue is complete\n";
foreach my $thr (@thrlist)
{
$thr->join;
}
print "all threads done\n";
print "building xml structure\n";
$currentday = &ParseDate("today");
$day_counter = 1;
while ($day_counter <= $days_to_grab)
{
my @pids;
my $date = &UnixDate($currentday, "%d%m%Y");
my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn";
if (open(PRN, $guide_prn_file))
{
my @prn = split />/, <PRN>;
close(PRN);
if ($#prn > 1)
{
my $pidlast = ($#prn + 1)/3 - 1;
@pids=@prn[0..$pidlast];
}
else
{
print "no pids in $guide_prn_file\n";
@pids=();
}
}
else
{
print "can't read $guide_prn_file\n";
@pids=()
}
my $retry = 0;
foreach my $pid (@pids)
{
my @details = get_details($date, $pid);
if (@details == 0)
{
next;
}
my $show_details_table = "";
my $use_line = 0;
my $close_html = 0;
foreach my $line (@details)
{
if ($line =~ /bgColor=#f7f3e8/)
{
$use_line = 0;
}
if ($use_line == 1)
{
$show_details_table .= $line;
}
if ($line =~ /bgcolor=#ffffff/)
{
$use_line = 1;
}
if ($line =~ /<\/HTML>/)
{
$close_html = 1;
}
}
if ($close_html == 0)
{
my $name = $cache_dir . "/" . $date . "/" . $pid . ".html";
if ($retry++ >= $retrys)
{
print "giving up on truncated $name\n";
$retry=0;
next;
}
unlink $name;
push @pids, $pid;
print "t"; # truncated
sleep($secondsbeforeretry);
next;
}
if ((length $show_details_table) == 0)
{
print "m"; # missing: can't do anything about this
$retry=0;
next;
}
$show_details_table =~ s/<[^>]*>/\n/g;
$show_details_table =~ s/\ \;//g;
#$show_details_table =~ s/<BR>|<TR>|<TD><B><b><\/B><\/b>/\n/g;
#$show_details_table =~ s/Genre://g;
#$show_details_table =~ s/Rated:/\n/g;
my $count = 0;
my $channel = "";
my $start_date = &UnixDate($currentday, "%Y-%m-%d");
my $time;
my $title1 = "";
my $title2 = "";
my $genre = "";
my $descr = "";
my $details = "";
my $duration;
#print $show_details_table. "\n\n\n";
foreach my $line (split /\n/, $show_details_table)
{
if ($count == 4){
#print "Time: " . $line . "\n";
$time = $line;
}
elsif ($count == 7){
$channel = $line;
#print "Channel: " . $line . "\n";
}
elsif ($count == 19){
$title1 = $line;
#print "Program: " . $line . "\n";
}
elsif ($count == 20){
$line =~ s/ - //g;
$title2 = $line;
#print "Subtitle: " . $line . "\n";
}
elsif ($count == 21){
$line =~ s/\D//g;
$duration = $line;
#print "Run time: " . $line . "\n";
}
elsif ($count == 22){
$line =~ s/[^A-Z]//g;
$details = $line;
#print "Rating: " . $line . "\n";
}
elsif ($count == 26){
$line =~ s/ //g;
$genre = $line;
#print "Genre: " . $line . "\n";
}
elsif ($count == 28 && $line =~ /[a-zA-Z]/){
$descr = $line;
#print "Description: " . $line . "\n";
}
#elsif ($count == 26 && $line =~ /[a-zA-Z]/){
# $descr = $line;
# print "Description: " . $line . "\n";
#}
#print $count .": " . $line . "\n";
++$count;
}
my $start_time = &UnixDate($time, "%H:%M");
# my $start_datetime = $start_date . " " . $start_time;
if ($start_time =~ /00:|01:|02:|03:|04:|05:/)
{
$start_date = &DateCalc($start_date, "+ 1 day");
}
$start_date = &UnixDate($start_date, "%Y%m%d");
my $end_time = &DateCalc($start_time, " + " . $duration . "minutes");
$end_time = &UnixDate($end_time, "%H:%M");
my $end_date;
if (&Date_Cmp($start_time, $end_time) <= 0)
{
$end_date = $start_date;
}
else
{
my $err;
my $edate = &DateCalc($start_date, "+ 1 day", \$err);
$end_date = &UnixDate($edate, "%Y%m%d");
}
if (defined $channels{$channel})
{
$channel = $XMLTV_prefix . $channels{$channel} . $XMLTV_suffix;
}
else
{
print "unknown channel $channel\n";
$retry=0;
next;
}
my $start;
my $stop;
$start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset;
$stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset;
my $a_prog = {
channel => $channel,
start => $start,
stop => $stop,
title => [ [ $title1, undef ] ]
};
$descr =~ s/^\s+//;
$descr =~ s/\s+$//;
if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; }
if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; }
if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; }
push @$prog_ref, $a_prog;
$retry=0;
}
$currentday = &DateCalc($currentday, "+ 1 day");
$day_counter++;
}
my $data = [
'ISO-8859-1',
{
'source-info-name' => 'http://tvguide.ninemsn.com.au/',
'generator-info-name' => 'NineMSN grabber',
'generator-info-url' => '',
'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.2"
},
$chan_ref,
$prog_ref
];
my $hour=&UnixDate(&ParseDate("now"),"%H");
if ($hour < 6)
{
print "can't update between 0:00 and 6:00\n";
# If we update between these hours we lose any data we had up to 6:00.
# This is because the web site starts a day at 6:00 and ends at 6:00 the next day
# This could be fixed by read the previous days info and adding the needed shows.
# I did try adding the whole previous day but got lots of mythfilldatabase errors.
exit(1);
}
print "writing file\n";
my $fh = new IO::File ">$opt_output";
XMLTV::write_data($data, OUTPUT=>$fh);
print "done\n";
# subroutines
sub get_day
{
my $date = shift;
my $force = shift;
my $url = $guide_url . $date . "_" . $region . ".asp";
my $guide_dir = $cache_dir . "/" . $date;
my $guide_file = $guide_dir . "/guide.html";
mkpath ($guide_dir);
for (my $retry=0; (($force==1) || (!(-e $guide_file))) && is_error(getstore($url, $guide_file)) && ($retry<$retrys); $retry++)
{
print ".";
sleep($secondsbeforeretry);
}
my @guide_lines;
if (open(GUIDE, $guide_file))
{
@guide_lines = <GUIDE>;
close(GUIDE);
}
else
{
@guide_lines = ();
print "giving up on $guide_file\n";
}
return @guide_lines;
}
sub get_details
{
my $date = shift;
my $program_id = shift;
my $url = $details_url . $program_id;
my $guide_dir = $cache_dir . "/" . $date;
my $details_file = $guide_dir . "/" . $program_id . ".html";
mkpath ($guide_dir);
for (my $retry=0; (!(-e $details_file)) && is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++)
{
print ".";
sleep($secondsbeforeretry);
}
my @details_lines;
if (open(DETAILS, $details_file))
{
@details_lines = <DETAILS>;
close(DETAILS);
}
else
{
@details_lines = ();
print "giving up on $details_file\n";
}
return @details_lines;
}
sub fetch_details
{
my $datepid=$datepids->dequeue;
my @datepidl=split /-/, $datepid;
my $date = $datepidl[0];
my $pid = $datepidl[1];
while (($date!=0) and ($pid!=0))
{
my $guide_dir = $cache_dir . "/" . $date;
mkpath ($guide_dir);
my $url = $details_url . $pid;
my $details_file = $guide_dir . "/" . $pid . ".html";
for (my $retry=0; is_error(getstore($url, $details_file)) && ($retry<$retrys); $retry++)
{
sleep($secondsbeforeretry);
}
$datepid=$datepids->dequeue;
@datepidl=split /-/, $datepid;
$date = $datepidl[0];
$pid = $datepidl[1];
}
}
sub changed_guide
{
my $date = shift;
my @pidsrowspansnames = @_;
my $guide_prn_file = $cache_dir . "/" . $date . "/guide.prn";
if (open(PRN, $guide_prn_file))
{
my @prn = split />/, <PRN>;
close(PRN);
if (($#prn > 1) and ($#prn == $#pidsrowspansnames))
{
my $count;
my $diff = ((($#prn+1)*2)/3)-1;
for ($count=0; ($count <= $diff) && ($prn[$count]==$pidsrowspansnames[$count]); $count++)
{ }
if ($count==($diff+1))
{
for (; ($count <= $#prn) && ($prn[$count] eq $pidsrowspansnames[$count]); $count++)
{ }
if ($count==($#prn+1))
{
print "$date unchanged\n";
return 0;
}
}
}
}
print "$date downloading\n";
if (open(PRN, ">", $guide_prn_file))
{
for (my $count=0; $count<$#pidsrowspansnames; $count++)
{
print PRN "$pidsrowspansnames[$count]>";
}
print PRN "$pidsrowspansnames[$#pidsrowspansnames]";
close(PRN);
}
else
{
print "can't open for writing $guide_prn_file\n";
}
return 1;
}
More information about the mythtv-users
mailing list