[mythtv-users] New Australian XMLTV grabber - Adelaide Mods

Michael Cheshire (Mailing Lists) michael-mlists at cheshire.id.au
Wed Nov 3 10:12:00 UTC 2004


I've modified that script for Adelaide (and done another one for Foxtel) and it seems to work for the most part, however i get missing data in some areas - script and errors attatched - any ideas from any coders out there?


#!/usr/bin/perl -w 

# Australian TV Guide XMLTV grabber by Damon Searle 
# Derived from a yahoo XMLTV grabber by Ron Kellam which was itself... 
# Derived from original code by Justin Hawkins 
# This program is free software; you can redistribute it and/or modify 
# it under the terms of the GNU General Public License as published by 
# the Free Software Foundation; either version 2 of the License, or 
# (at your option) any later version. 
# This program is distributed in the hope that it will be useful, 
# but WITHOUT ANY WARRANTY; without even the implied warranty of 
# GNU General Public License for more details. 
# You should have received a copy of the GNU General Public License 
# along with this program; if not, write to the Free Software 
# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 

# *** Only tested with ACT data *** 

# Works for the most part with Adeladie Data

# Instructions: 
# Go to http://tvguide.ninemsn.com.au/guide/ select your area 
# Look at the last number in the URL before ".asp" and set 
# the region variable below. Then put the channel names as listed 
# on the tv guide site into the variables below. 
# Then set your XMLTV ids from the database in the _XMLTVID variables. 

# If it doesn't work with mythfilldatabase, try: 
# tv_grab_au 
# mythfilldatabase 1 -1 /tmp/tvguide/guide.xml 

# Modified by Michael Cheshire to pick up data for Adelaide

use strict; 
use Getopt::Long; 
use XMLTV; 
use LWP::Simple; 
use Date::Manip; 
use File::Path; 

# Variables 

my $days_to_grab = 7; 
my $region = "81"; # 126 = ACT, 123 = Fox, 83 = Adelaide 
my $guide_url = "http://tvguide.ninemsn.com.au/guide/"; 
my $details_url = "http://tvguide.ninemsn.com.au/closeup/default.asp?pid="; 
my $cache_dir = "/tmp/tvguide.fta"; 
my $offset = "+0930"; 

my $ABC = "ABC SA"; 
my $Prime = "Channel Seven Adelaide"; #Channel 7 in Sydney/Melbourne/etc 
my $SBS = "SBS"; 
my $Ten = "Network TEN Adelaide"; 
my $WIN = "Channel Nine Adelaide"; #Channel 9 in Sydney/Melbourne/etc 

#my $ABC_XMLTVID = "freesd.Canberra.2.d1.com.au"; 
#my $Prime_XMLTVID = "freesd.Canberra.7.d1.com.au"; 
#my $SBS_XMLTVID = "freesd.Canberra.SBS.d1.com.au"; 
#my $Ten_XMLTVID = "freesd.Canberra.10.d1.com.au"; 
#my $WIN_XMLTVID = "freesd.Canberra.9.d1.com.au"; 

my $ABC_XMLTVID = "free.Adelaide.2.d1.com.au"; 
my $Prime_XMLTVID = "free.Adelaide.7.d1.com.au"; 
my $SBS_XMLTVID = "free.Adelaide.SBS.d1.com.au"; 
my $Ten_XMLTVID = "free.Adelaide.10.d1.com.au"; 
my $WIN_XMLTVID = "free.Adelaide.9.d1.com.au"; 

my $opt_days; 
my $opt_output; 

GetOptions('days=i' => \$opt_days, 
'output=s' => \$opt_output 

if ($opt_days) { 
$days_to_grab = $opt_days 

if (!($opt_output)) { 
$opt_output = $cache_dir . "/guide.xml"; 

print "$days_to_grab, $opt_output\n"; 

my $currentday = &ParseDate("today"); 
my $prog_ref; 
my $chan_ref; 

$$chan_ref{$ABC} = { 
'id' => $ABC_XMLTVID, 
'display-name' => [ [ $ABC, undef ]]}; 
$$chan_ref{$Prime} = { 
'id' => $Prime_XMLTVID, 
'display-name' => [ [ $Prime, undef ]]}; 
$$chan_ref{$SBS} = { 
'id' => $SBS_XMLTVID, 
'display-name' => [ [ $SBS, undef ]]}; 
$$chan_ref{$Ten} = { 
'id' => $Ten_XMLTVID, 
'display-name' => [ [ $Ten, undef ]]}; 
$$chan_ref{$WIN} = { 
'id' => $WIN_XMLTVID, 
'display-name' => [ [ $WIN, undef ]]}; 

my $day_counter = 1; 
while ($day_counter <= $days_to_grab) 
my $date = &UnixDate($currentday, "%d%m%Y"); 
my @day_lines = get_day($date); 
my @pids; 
foreach my $line (@day_lines) 
foreach my $link (split /\n|tr|TR|TD|tr/, $line ) 
if ($link =~ /closeup\/default.asp/) 
$link =~ s/.+pid=//g; 
$link =~ s/".+//g; 
if ($link =~ /\d+/) 
push @pids, $link; 

foreach my $pid (@pids) 
my @details = get_details($date, $pid); 

my $show_details_table = ""; 
my $use_line = 0; 
foreach my $line (@details) 
if ($line =~ /bgColor=#f7f3e8/) 
$use_line = 0; 
if ($use_line == 1) 
$show_details_table .= $line; 
if ($line =~ /bgcolor=#ffffff/) 
$use_line = 1; 


$show_details_table =~ s/<[^>]*>/\n/g; 
$show_details_table =~ s/\&nbsp\;//g; 
#$show_details_table =~ s/<BR>|<TR>|<TD><B><b><\/B><\/b>/\n/g; 
#$show_details_table =~ s/Genre://g; 
#$show_details_table =~ s/Rated:/\n/g; 
my $count = 0; 

my $channel = ""; 
my $start_date = &UnixDate($currentday, "%Y-%m-%d"); 
my $time; 
my $title1 = ""; 
my $title2 = ""; 
my $genre = ""; 
my $descr = ""; 
my $details = ""; 
my $duration; 

#print $show_details_table. "\n\n\n"; 
foreach my $line (split /\n/, $show_details_table) 
if ($count == 4){ 
#print "Time: " . $line . "\n"; 
$time = $line; 
elsif ($count == 7){ 
$channel = $line; 
#print "Channel: " . $line . "\n"; 
elsif ($count == 19){ 
$title1 = $line; 
#print "Program: " . $line . "\n"; 
elsif ($count == 20){ 
$line =~ s/ - //g; 
$title2 = $line; 
#print "Subtitle: " . $line . "\n"; 
elsif ($count == 21){ 
$line =~ s/\D//g; 
$duration = $line; 
#print "Run time: " . $line . "\n"; 
elsif ($count == 22){ 
$line =~ s/[^A-Z]//g; 
$details = $line; 
#print "Rating: " . $line . "\n"; 
elsif ($count == 26){ 
$line =~ s/ //g; 
$genre = $line; 
#print "Genre: " . $line . "\n"; 
elsif ($count == 28 && $line =~ /[a-zA-Z]/){ 
$descr = $line; 
#print "Description: " . $line . "\n"; 
#elsif ($count == 26 && $line =~ /[a-zA-Z]/){ 
# $descr = $line; 
# print "Description: " . $line . "\n"; 
#print $count .": " . $line . "\n"; 

my $start_time = &UnixDate($time, "%H:%M"); 
my $start_datetime = $start_date . " " . $start_time; 
if ($start_time =~ /00:|01:|02:|03:|04:|05:/) 
$start_date = &DateCalc($start_date, "+ 1 day"); 
$start_date = &UnixDate($start_date, "%Y%m%d"); 
my $end_time = &DateCalc($start_time, " + " . $duration . "minutes"); 
$end_time = &UnixDate($end_time, "%H:%M"); 

my $end_date; 
if (&Date_Cmp($start_time, $end_time) <= 0) 
$end_date = $start_date; 
my $err; 
my $edate = &DateCalc($start_date, "+ 1 day", \$err); 
$end_date = &UnixDate($edate, "%Y%m%d"); 

if ($channel =~ /$ABC/) 
$channel = $ABC_XMLTVID; 
elsif ($channel =~ /$Prime/) 
$channel = $Prime_XMLTVID; 
elsif ($channel =~ /$SBS/) 
$channel = $SBS_XMLTVID; 
elsif ($channel =~ /$Ten/) 
$channel = $Ten_XMLTVID; 
elsif ($channel =~ /$WIN/) 
$channel = $WIN_XMLTVID; 
my $start; 
my $stop; 

$start = $start_date . &UnixDate($start_time,"%H%M") . "00 " . $offset; 
$stop = $end_date . &UnixDate($end_time,"%H%M") . "00 +0930"; 

my $a_prog = { 
channel => $channel, 
start => $start, 
stop => $stop, 
title => [ [ $title1, undef ] ] 

$descr =~ s/^\s+//; 
$descr =~ s/\s+$//; 

if ($title2) { $$a_prog{'sub-title'} = [ [ $title2, undef ] ]; } 
if ($descr) { $$a_prog{desc} = [ [ $descr, undef ] ]; } 
if ($genre) { $$a_prog{category} = [ [ $genre, undef ] ]; } 

push @$prog_ref, $a_prog; 

$currentday = &DateCalc($currentday, "+ 1 day"); 

my $data = [ 
'source-info-name' => 'http://tvguide.ninemsn.com.au/', 
'generator-info-name' => 'NineMSN grabber', 
'generator-info-url' => '', 
'generator-info-name' => "XMLTV - tv_grab_au NineMSN v0.1" 

#my $outfile = $cache_dir . "/guide.xml"; 
my $outfile = $opt_output; 

my $fh = new IO::File ">$outfile"; 
XMLTV::write_data($data, OUTPUT=>$fh); 

# download the guide for the date to file 
sub get_day 
my $date = $_[0]; 
my $url = $guide_url . $date . "_" . $region . ".asp"; 

my $guide_dir = $cache_dir . "/" . $date; 
my $guide_file = $guide_dir . "/guide.html"; 

if (!(-e $guide_file)) 
mkpath ($guide_dir); 
getstore($url, $guide_file); 
open(GUIDE, $guide_file); 
my @guide_lines = <GUIDE>; 
return @guide_lines; 

sub get_details 
my $date = $_[0]; 
my $program_id = $_[1]; 

my $url = $details_url . $program_id; 
my $guide_dir = $cache_dir . "/" . $date; 
my $details_file = $guide_dir . "/" . $program_id . ".html"; 

if (!(-e $details_file)) 
mkpath ($guide_dir); 
getstore($url, $details_file); 

open(DETAILS, $details_file); 
my @details_lines = <DETAILS>; 
return @details_lines; 


Errors I get running the script : 

7, /tmp/tvguide.fta/guide.xml
readline() on closed filehandle DETAILS at /usr/bin/tv_guide_au_fta line 345.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 220.
Use of uninitialized value in pattern match (m//) at /usr/bin/tv_guide_au_fta line 221.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 226.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 264.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 265.
readline() on closed filehandle DETAILS at /usr/bin/tv_guide_au_fta line 345.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 220.
Use of uninitialized value in pattern match (m//) at /usr/bin/tv_guide_au_fta line 221.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 226.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 264.
Use of uninitialized value in concatenation (.) or string at /usr/bin/tv_guide_au_fta line 265.
title element: not writing empty content for title at /usr/share/perl5/XMLTV.pm line 1730.
bad data inside programme element, not writing
title element: not writing empty content for title at /usr/share/perl5/XMLTV.pm line 1730.
bad data inside programme element, not writing


Any ideas anyone? - I can post you the /tmp files if you require. 



-------------- next part --------------
An HTML attachment was scrubbed...
URL: http://mythtv.org/pipermail/mythtv-users/attachments/20041103/55a37d8a/attachment.htm

More information about the mythtv-users mailing list