[mythtv-users] Australian tv guide grabbers

Duncan Sargeant dunc-mythtv at rcpt.to
Fri Aug 1 15:38:22 EDT 2003


I've written a couple of perl guiscapers to suck Australian tv guides
out of the RF ether or off the web, and I thought I would share them
here.

The first, tv_grab_austext, use the v4l command line tools and
alevt-cap to tune to channel 7, and looks up the teletext information
to get the day's tv guide.  Pros:  no net connection required.  Cons: 
only that day's guide available, no guide for Channel 9, no program
descriptions.

tvnine builds tv guide information from ninemsn.com.au.  Its a better
script, and include partial descriptions for shows, as given on the
site.

Both scripts bypass any 'normal' way of filling the database in mythtv
- no XML, no mythfilldatabase, no more than 100 lines of code, it
doesn't matter how you configure myth to download tvguides.  Just run
the script, whenever you like, it will pick up where it left off and
download as much future data as it can find.

An incomplete list of tasks to use these scripts:
.  manually populate the channel table
.  customise the URL in the script
.  change the mythtv database password in the script
.  change the channel page mapping in the scipts for austext
.  change the map of channel names to channels in tvnine

Enjoy!
,dunc
-------------- next part --------------
#!/usr/bin/perl

use Time::ParseDate;
use LWP::Simple;
use POSIX qw(strftime);
use DBI;

my ($opt_d, $opt_r);

## debug, redownload
$opt_d = 1 if $ARGV[0] eq '-d';
$opt_r = 1 if $ARGV[0] eq '-r';

use strict;

my %chanmap = (
	'SBS' => 28,
	'ABC TV WA' => 2,
	'Channel Nine Perth' => 9,
	'Channel Seven Perth' => 7,
	'Network TEN Perth' => 10,
);

my %chanid;
my $dbh = DBI->connect("DBI:mysql:database=mythconverg", "mythtv", "***CHANGEME***", { RaiseError => 1 });
my $sth = $dbh->prepare(<<EOF);
  REPLACE INTO program (chanid, starttime, endtime, title, description)
	VALUES (?, from_unixtime(?), from_unixtime(?), ?, ?)
EOF

if ($opt_r) {
	### you'll need to create an oldfuture table to use this feature.
	#
	$dbh->do("DELETE FROM oldfuture");
	$dbh->do(strftime("INSERT INTO oldfuture SELECT * FROM program WHERE starttime > '%Y%m%d000000'", localtime));
	$dbh->do(strftime("DELETE FROM program WHERE starttime > '%Y%m%d000000'", localtime));
}

my $startday;

{
	my $ret = $dbh->selectall_arrayref("SELECT chanid,channum FROM channel");
	foreach (@$ret) {
		$chanid{$_->[1]} = $_->[0];
	}

	$ret = $dbh->selectall_arrayref("SELECT unix_timestamp(max(endtime)) FROM program");
	$startday = $ret->[0][0];
}

my $day;

for ($day = $startday; $day <= time() + 86400 * 9; $day += 86400) {
my $url = strftime "http://tvguide.ninemsn.com.au/search/default.asp?region=101&day=%d%%2F%m%%2F%Y&TimeZ=n&type=fta&search=true&go.x=12&go.y=12&go=go", localtime ($day);

print "$url\n" if $opt_d;

my @content = grep { /We have found/ } split (/\n/, get $url);
my (@rows) = split (/\Q<tr valign=top>\E/i, $content[0]);

my %i;

shift(@rows);

foreach my $r (@rows) {
    my ($c0, $c2) = split('</td><td[^>]+>', $r);
    my $c3;

    ($i{time}, $i{chan}) = split ('<BR>', $c1);
    ($i{title}, $c3) = split ('</b>', $c2);
    ($i{length}, $i{desc}) = split /\)/, $c3;

    foreach (qw(chan time length title desc day)) {
	$i{$_} =~ s/<[^>]+>//g;
	$i{$_} =~ s/\Q&quot;\E/'/g;
	$i{$_} =~ s/\Q&dquot;\E/"/g;
	# print "$_: $i{$_}\n";
    }
    $i{length} =~ s/\D//g;

    $i{start} = parsedate("$i{time} " . strftime("%F", localtime($day)));
    $i{end} = $i{start} + $i{length} * 60;

    $i{chanid} = $chanid{$chanmap{$i{chan}}};

#    print join (" ", scalar localtime($i{start}), @i{qw(chanid title desc)}) . "\n";
#    foreach (qw(chanid start end title desc)) {
#	print "$_: $i{$_}\n";
#    }
    $sth->execute(@i{qw(chanid start end title desc)});
}
}
-------------- next part --------------
#!/usr/bin/perl -w


use strict;
use DBI;
use vars qw(%ttguide @ttpages $dbh $sth %chanid $today $tomorrow);
use POSIX qw(strftime);

### changes these to the tvguide for your local channels..

%ttguide = (
	28 => 651,
	2 => 655,
	7 => 663,
	10 => 669,
);

@ttpages = map { ("$_.01", "$_.02") } values %ttguide;
$today = time + 7200;
$tomorrow = $today + 86400;

$dbh = DBI->connect("DBI:mysql:database=mythconverg", "mythtv", "***CHANGEME***");
$sth = $dbh->prepare( <<EOF);
  REPLACE INTO program (chanid, starttime, endtime, title) VALUES (?, ?, ?, ?)
EOF

{
	my $ret = $dbh->selectall_arrayref("SELECT chanid,channum FROM channel");
	foreach (@$ret) {
		$chanid{$_->[1]} = $_->[0];
	}
}

### austext is on channel 7
#
system qw(/usr/bin/v4lctl setstation 7);
system qw(/usr/bin/v4lctl volume mute);
system ("alevt-cap", @ttpages);

foreach my $chan (keys %ttguide) {
	my ($start, $title);
	foreach my $sp ("01", "02") {
		open (TT, "ttext-$ttguide{$chan}.$sp.txt");
		unlink ("ttext-$ttguide{$chan}.$sp.txt");

		while (<TT>) {
			chomp;

			if (/^ {15,}([^ ].+)$/) {
				$title .= " $1";
				next;
			}
			next unless /^ (\d\d).(\d\d)  (.*)$/;

			## determine the day...
			my $d = ($sp eq "02" and $1 < 12) ? $tomorrow : $today;
			my $t = strftime("%Y%m%d", localtime $d) . "$1${2}00";
			my $n = $3;

			if ($start) {
				# eliminate whitespace
				$title =~ s/\s+/ /g;
				$title =~ s/\s$//;

				# remove rating..
				$title =~ s/\s?\(.*\)$//;
				$title =~ s!\s?/\w{0,3}$!!;

				$sth->execute($chanid{$chan}, $start, $t, $title);
				# print join (" ", ($chan, $chanid{$chan}, $start, $t, $title)) . "\n";
			}
			$start = $t;
			$title = $n;
		}
		## can't be bothered with the last program! ... :-(
	}
}


More information about the mythtv-users mailing list