[mythtv-users] New Australian XMLTV grabber
Fred Donelly
fdonelly at hotmail.com
Sun Oct 31 11:23:27 UTC 2004
Skipped content of type multipart/alternative-------------- next part --------------
#!/usr/bin/perl -w
# Australian TV Guide XMLTV grabber by Damon Searle
# Derived from a yahoo XMLTV grabber by Ron Kellam which was itself...
# Derived from original code by Justin Hawkins
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
# *** Only tested with ACT data ***
# Instructions:
# Go to http://tvguide.ninemsn.com.au/guide/ select your area
# Look at the last number in the URL before ".asp" and set
# the region variable below. Then put the channel names as listed
# on the tv guide site into the variables below.
# Then set your XMLTV ids from the database in the _XMLTVID variables.
# If it doesn't work with mythfilldatabase, try:
# tv_grab_au
# mythfilldatabase 1 -1 /tmp/tvguide/guide.xml
use strict;
use Getopt::Long;
use XMLTV;
use LWP::Simple;
use Date::Manip;
use File::Path;
# Variables
my $days_to_grab = 7;
my $region = "126"; # 126 = ACT
my $guide_url = "http://tvguide.ninemsn.com.au/guide/";
my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid=";
my $cache_dir = "/tmp/tvguide";
my $offset = "+1100";
my $ABC = "ABC NSW";
my $Prime = "Prime Southern"; #Channel 7 in Sydney/Melbourne/etc
my $SBS = "SBS Sydney";
my $Ten = "Southern Cross TEN Capital";
my $WIN = "WIN Television NSW"; #Channel 9 in Sydney/Melbourne/etc
#my $ABC_XMLTVID = "freesd.Canberra.2.d1.com.au";
#my $Prime_XMLTVID = "freesd.Canberra.7.d1.com.au";
#my $SBS_XMLTVID = "freesd.Canberra.SBS.d1.com.au";
#my $Ten_XMLTVID = "freesd.Canberra.10.d1.com.au";
#my $WIN_XMLTVID = "freesd.Canberra.9.d1.com.au";
my $ABC_XMLTVID = "free.Canberra.2.d1.com.au";
my $Prime_XMLTVID = "free.Canberra.PrimS.d1.com.au";
my $SBS_XMLTVID = "free.Canberra.SBS.d1.com.au";
my $Ten_XMLTVID = "free.Canberra.10Cap.d1.com.au";
my $WIN_XMLTVID = "free.Canberra.WIN.d1.com.au";
my $opt_days;
my $opt_output;
GetOptions('days=i' => \$opt_days,
'output=s' => \$opt_output
);
if ($opt_days) {
$days_to_grab = $opt_days
}
if (!($opt_output)) {
$opt_output = $cache_dir . "/guide.xml";
}
print "$days_to_grab, $opt_output\n";
#exit(0);
my $currentday = &ParseDate("today");
my $prog_ref;
my $chan_ref;
$$chan_ref{$ABC} = {
'id' => $ABC_XMLTVID,
'display-name' => [ [ $ABC, undef ]]};
$$chan_ref{$Prime} = {
'id' => $Prime_XMLTVID,
'display-name' => [ [ $Prime, undef ]]};
$$chan_ref{$SBS} = {
'id' => $SBS_XMLTVID,
'display-name' => [ [ $SBS, undef ]]};
$$chan_ref{$Ten} = {
'id' => $Ten_XMLTVID,
'display-name' => [ [ $Ten, undef ]]};
$$chan_ref{$WIN} = {
'id' => $WIN_XMLTVID,
'display-name' => [ [ $WIN, undef ]]};
my $day_counter = 1;
while ($day_counter <= $days_to_grab)
{
my $date = &UnixDate($currentday, "%d%m%Y");
my @day_lines = get_day($date);
my @pids;
foreach my $line (@day_lines)
{
foreach my $link (split /\n|tr|TR|TD|tr/, $line )
{
if ($link =~ /closeup\/default.asp/)
{
$link =~ s/.+pid=//g;
$link =~ s/".+//g;
if ($link =~ /\d+/)
{
push @pids, $link;
}
}
}
}
foreach my $pid (@pids)
{
my @details = get_details($date, $pid);
my $show_details_table = "";
my $use_line = 0;
foreach my $line (@details)
{
if ($line =~ /bgColor=#f7f3e8/)
{
$use_line = 0;
}
if ($use_line == 1)
{
$show_details_table .= $line;
}
if ($line =~ /bgcolor=#ffffff/)
{
$use_line = 1;
}
}
$show_details_table =~ s/<[^>]*>/\n/g;
$show_details_table =~ s/\ \;//g;
#$show_details_table =~ s/<BR>|<TR>|<TD><B><b><\/B><\/b>/\n/g;
#$show_details_table =~ s/Genre://g;
#$show_details_table =~ s/Rated:/\n/g;
my $count = 0;
my $channel = "";
my $start_date = &UnixDate($currentday, "%Y-%m-%d");
my $time;
my $title1 = "";
my $title2 = "";
my $genre = "";
my $descr = "";
my $details = "";
my $duration;
#print $show_details_table. "\n\n\n";
foreach my $line (split /\n/, $show_details_table)
{
if ($count == 4){
#print "Time: " . $line . "\n";
$time = $line;
}
elsif ($count == 7){
$channel = $line;
#print "Channel: " . $line . "\n";
}
elsif ($count == 19){
$title1 = $line;
#print "Program: " . $line . "\n";
}
elsif ($count == 20){
$line =~ s/ - //g;
$title2 = $line;
#print "Subtitle: " . $line . "\n";
}
elsif ($count == 21){
$line =~ s/\D//g;
$duration = $line;
#print "Run time: " . $line . "\n";
}
elsif ($count == 22){
$line =~ s/[^A-Z]//g;
$details = $line;
#print "Rating: " . $line . "\n";
}
elsif ($count == 26){
$line =~ s/ //g;
$genre = $line;
#print "Genre: " . $line . "\n";
}
elsif ($count == 28 && $line =~ /[a-zA-Z]/){
$descr = $line;
#print "Description: " . $line . "\n";
}
#elsif ($count == 26 && $line =~ /[a-zA-Z]/){
# $descr = $line;
# print "Description: " . $line . "\n";
#}
#print $count .": " . $line . "\n";
++$count;
}
my $start_time = &UnixDate($time, "%H:%M");
my $start_datetime = $start_date . " " . $start_time;
if ($start_time =~ /00:|01:|02:|03:|04:|05:/)
{
$start_date = &DateCalc($start_date, "+ 1 day");
}
$start_date = &UnixDate($start_date, "%Y%m%d");
my $end_time = &DateCalc($start_time, " + " . $duration . "minutes");
$end_time = &UnixDate($end_time, "%H:%M");
my $end_date;
if (&Date_Cmp($start_time, $end_time) <= 0)
{
$end_date = $start_date;
}
else
{
my $err;
my $edate = &DateCalc($start_date, "+ 1 day", \$err);
$end_date = &UnixDate($edate, "%Y%m%d");
}
if ($channel =~ /$ABC/)
{
$channel = $ABC_XMLTVID;
}
elsif ($channel =~ /$Prime/)
{
$channel = $Prime_XMLTVID;
}
elsif ($channel =~ /$SBS/)
{
$channel = $SBS_XMLTVID;
}
elsif ($channel =~ /$Ten/)
{
$channel = $Ten_XMLTVID;
}
elsif ($channel =~ /$WIN/)
{
$channel = $WIN_XMLTVID;
}
my $start;
my $stop;
$start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset;
$stop = $end_date . &UnixDate($end_time,"%H%M") . "00 " . $offset;
my $a_prog = {
channel => $channel,
start => $start,
stop => $stop,
title => [ [ $title1, undef ] ]
};
$descr =~ s/^\s+//;
$descr =~ s/\s+$//;
if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; }
if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; }
if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; }
push @$prog_ref, $a_prog;
}
$day_counter++;
$currentday = &DateCalc($currentday, "+ 1 day");
}
my $data = [
'ISO-8859-1',
{
'source-info-name' => 'http://tvguide.ninemsn.com.au/',
'generator-info-name' => 'NineMSN grabber',
'generator-info-url' => '',
'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.1"
},
$chan_ref,
$prog_ref
];
#my $outfile = $cache_dir . "/guide.xml";
my $outfile = $opt_output;
my $fh = new IO::File ">$outfile";
XMLTV::write_data($data, OUTPUT=>$fh);
# download the guide for the date to file
sub get_day
{
my $date = $_[0];
my $url = $guide_url . $date . "_" . $region . ".asp";
my $guide_dir = $cache_dir . "/" . $date;
my $guide_file = $guide_dir . "/guide.html";
if (!(-e $guide_file))
{
mkpath ($guide_dir);
getstore($url, $guide_file);
}
open(GUIDE, $guide_file);
my @guide_lines = <GUIDE>;
close(GUIDE);
return @guide_lines;
}
sub get_details
{
my $date = $_[0];
my $program_id = $_[1];
my $url = $details_url . $program_id;
my $guide_dir = $cache_dir . "/" . $date;
my $details_file = $guide_dir . "/" . $program_id . ".html";
if (!(-e $details_file))
{
mkpath ($guide_dir);
getstore($url, $details_file);
}
open(DETAILS, $details_file);
my @details_lines = <DETAILS>;
close(DETAILS);
return @details_lines;
}
More information about the mythtv-users
mailing list