[mythtv-users] mythfilldatabase + tv_grab_uk_rt

Matt Collins matt at clues.com
Wed Oct 20 11:48:15 UTC 2004


On Tue, Oct 19, 2004 at 11:40:58PM +0100, Greg wrote:
> There are some channel numbers on the radiotimes site, but I think they have
> them mixed up with the ntl ones, at least it looks that way to me.
> I wouldn't mind a copy of your scripts please.
> 
> Ta,
> Greg
> 

Well, as I had more than a few requests, I'm sending this to the list at
large. It might well be 'b0rken' from an Isaac perspective though, so
don't imagine its perfect. Instructions follow:

# Run tv_grab_uk_rt --configure to get your channels, use '?' to list unchosen ones, edit the 
# output to get the channels in the order you want them, then...
#                              
# makechan.pl tv_grab_uk_rt.xml >/tmp/test.mc  
#                                           
# (or whatever you called the tv_grab output)
#                                           
# Provided there were no horrible error messages, read test.mc to see if its sensible, and
# then 
#  
# mysql -u mythtv -p mythconverg </tmp/test.mc
#  
# and enter your mythtv password.
#  
# This will fill the d/b with channel data and mythfilldatabase will now collect program
# info - it doesnt get many icons as sadly the bleb file doesnt use XMLTV identifiers,
# but its a start. You'll need to hand enter the channel numbers for your IR Blaster
# though - Have fun.


Scripts and my xmltv file attached,

Cheers

Matt


-------------- next part --------------
#!/usr/bin/perl

eval 'exec /usr/bin/perl  -S $0 ${1+"$@"}'
    if 0; # not running under some shell
use warnings;
use strict;
use Getopt::Long;
use Date::Manip; Date_Init('TZ=+0000');
use XMLTV::Version '$Id: tv_grab_uk_rt.in,v 1.71 2004/09/27 20:33:21 epaepa Exp $ ';
use XMLTV::Config_file;
use XMLTV::Get_nice;
use XMLTV::Ask;
use XMLTV::Memoize; XMLTV::Memoize::check_argv 'get_nice';
use XMLTV::DST;
use XMLTV::Usage <<END
To configure: $0 --configure [--config-file FILE]
To grab listings: $0 [--config-file FILE] [--output FILE] [--quiet]
List channels only: $0 --list-channels [--output FILE] [--quiet]
END
  ;
$XMLTV::Get_nice::Delay = 0; # since this is intended for grabbing
my $channel_list_uri = 'http://xmltv.radiotimes.com/xmltv/channels.dat';

sub configure();

# Use Log::TraceMessages if installed.
BEGIN {
    eval { require Log::TraceMessages };
    if ($@) {
	*t = sub {};
	*d = sub { '' };
    }
    else {
	*t = \&Log::TraceMessages::t;
	*d = \&Log::TraceMessages::d;
	Log::TraceMessages::check_argv();
    }
}

# Use Term::ProgressBar if installed.
use constant Have_bar => eval { require Term::ProgressBar; 1 };

GetOptions('help'          => \ my $opt_help,
	   'configure'     => \ my $opt_configure,
	   'config-file=s' => \ my $opt_config_file,
	   'output=s'      => \ my $opt_output,
	   'share=s'       => \ my $opt_share, # also undocumented
	   'quiet'         => \ my $opt_quiet,
	   'list-channels' => \ my $opt_list_channels,
	   'days'          => undef, # ignored
	   'offset'        => undef, # ignored
	  )
  or usage(0);
if ($opt_help) {
    usage(1);
}

# share/ directory for storing channel mapping files.  This next line
# is altered by processing through tv_grab_uk_rt.PL.  But we can use
# the current directory instead of share/tv_grab_uk for development.
#
# The 'source' file tv_grab_uk_rt.in has $SHARE_DIR undef, which means
# use the current directory.  In any case the directory can be
# overridden with the --share option (useful for testing).
#
my $SHARE_DIR='/share/xmltv'; # by grab/uk_rt/tv_grab_uk_rt.PL
$SHARE_DIR = $opt_share if defined $opt_share;
my $OUR_SHARE_DIR = (defined $SHARE_DIR) ? "$SHARE_DIR/tv_grab_uk_rt" : '.';
(my $CHANNEL_NAMES_FILE = "$OUR_SHARE_DIR/channel_ids") =~ tr!/!/!s;

# Stuff for the root <tv> element.
my %tv_credits = ( # 'source-info-url'     => "todo",
		   'source-info-name'    => 'Radio Times',
		   'generator-info-name' => 'XMLTV',
		   'generator-info-url'  =>
		   'http://membled.com/work/apps/xmltv/',
		 );

# Tables to convert between Radio Times and XMLTV ids of channels.
# The way to access these is through the routines rt_to_xmltv() and
# xmltv_to_rt(), not directly.  Those will deal sensibly with a new RT
# channel that isn't mentioned in the file.
#
my (%rt_to_xmltv, %xmltv_to_rt, %extra_dn);
my $line_num = 0;
foreach (XMLTV::Config_file::read_lines($CHANNEL_NAMES_FILE, 1)) {
    ++ $line_num;
    next unless defined;
    my $where = "$CHANNEL_NAMES_FILE:$line_num";
    my @fields = split /:/;
    die "$where: wrong number of fields"
      if @fields < 2 or @fields > 3;

    my ($xmltv_id, $rt_id, $extra_dn) = @fields;
    warn "$where: RT id $rt_id seen already\n"
      if defined $rt_to_xmltv{$rt_id};
    $rt_to_xmltv{$rt_id} = $xmltv_id;
    warn "$where: XMLTV id $xmltv_id seen already\n"
      if defined $xmltv_to_rt{$xmltv_id};
    $xmltv_to_rt{$xmltv_id} = $rt_id;

    $extra_dn{$xmltv_id} = $extra_dn if defined $extra_dn;
}

say <<END
All data is the copyright of the Radio Times website
<http://www.radiotimes.com> and the use of this data is restricted to
personal use only.
END
;

# Whatever we're doing, we need the channel list.
my $bar = Term::ProgressBar->new({ name => 'finding channels',
				   count => 1 })
  if Have_bar and not $opt_quiet;
my $channel_list = get_nice $channel_list_uri;
my (%channels, %seen_rt_id, %seen_name);
foreach (split /\n/, $channel_list) {
    chomp;
    /^(\d+)\|(.+)/ or die "bad line in channel list: $_";
    my ($rt_id, $name) = ($1, $2);
    $seen_rt_id{$rt_id}++ && die "channel with RT id $rt_id seen twice";
    $seen_name{$name}++ && die "channel named '$name' seen twice";
    my $xmltv_id = $rt_to_xmltv{$rt_id};
    if (not defined $xmltv_id) {
	warn "RT id $rt_id ($name) not known in channel ids file\n";
	$xmltv_id = "C$rt_id.radiotimes.com";
    }
    my @names = ([ $name ]);
    for ($extra_dn{$xmltv_id}) { push @names, [ $_ ] if defined }
    $channels{$xmltv_id} = { id => $xmltv_id,
			     rt_id => $rt_id,
			     'display-name' => \@names };
}
update $bar if $bar;
foreach (keys %xmltv_to_rt) {
    warn "channel $_ ($xmltv_to_rt{$_}) not seen on site"
      if not exists $channels{$_};
}

my %g_args = ();
if (defined $opt_output) {
    my $fh = new IO::File ">$opt_output";
    die "cannot write to $opt_output\n" if not $fh;
    %g_args = (OUTPUT => $fh);
}

if ($opt_list_channels) {
    die "--list-channels can't be given with --configure\n"
      if $opt_configure;
    my $writer = new XMLTV::Writer(%g_args, encoding => 'ISO-8859-1');
    $writer->start(\%tv_credits);
    foreach (sort keys %channels) {
	delete $channels{$_}{rt_id};
	$writer->write_channel($channels{$_});
    }
    $writer->end;
    exit;
}

# File that stores which channels to download.
my $config_file
  = XMLTV::Config_file::filename($opt_config_file, 'tv_grab_uk_rt', $opt_quiet);

if ($opt_configure) {
    configure();
    exit;
}

sub show_chan {
	my %channels=%{ shift() };
	my %chose_ch=%{ shift() };
	
    	my %show_chan;
    	foreach my $k (sort keys %channels) {
		if (! $chose_ch{$channels{$k}->{id}}) {
			foreach my $dn (@{ $channels{$k}->{'display-name'}}) {
				foreach my $dnt (@{ $dn }) {
					$show_chan{$dnt}++;
				}
			}
		}
	}

	print(join(", ",sort(keys(%show_chan)))."\n");
}
# Ask the user which channels to download, and write $config_file.
#
# Uses global %channels hash.
#
# FIXME commonize with other grabbers.
#
sub configure() {
#    local $Log::TraceMessages::On = 1;

    XMLTV::Config_file::check_no_overwrite($config_file);

    # FIXME need to make directory
    open(CONF, ">$config_file") or die "cannot write to $config_file: $!";
    my %chose_ch;
    t 'channels: ' . d \%channels;

    # For now we just let the user pick among the 'standard' channels.
    for (;;) {

	my $in = ask(<<END
Enter the name of a channel, '?' to list, or '.' to finish selecting channels:
END
	);
	# treat EOF as same as '.' -- finish
	$in = '.' if not defined $in;
	$in =~ s/^\s+//;
	$in =~ s/\s+$//;
	# handle backspace
	$in =~ s/.\x08//g;

	if ($in eq '.') {
	    if (not keys %chose_ch) {
		say('You must choose at least one channel.');
		next;
	    }
	    last;
	} elsif ($in eq "?" ) {
		show_chan(\%channels, \%chose_ch);
		next;
	}

	# FIXME commonize this matching by display name.
	my @poss;
      CH: foreach my $k (sort keys %channels) {
	    my $ch = $channels{$k};
	    my $dns = $ch->{'display-name'};
	    unless ($dns and @$dns) {
		warn "channel with id $ch->{id} has no display name, so cannot be configured\n";
		next CH;
	    }
	    foreach (map { $_->[0] } @$dns) {
		# use substring match
		if (index(lc, lc $in) != -1) {
		    push @poss, $ch;
		    next CH;
		}
	    }
	}

	# We only matched based on display names, so we can assume
	# that each possible channel has at least one.
	#
	if (@poss == 0) {
	    say("There is no channel called '$in'.");
	}
	elsif (@poss == 1) {
	    my $ch = $poss[0];
	    if (ask_boolean('Add channel ' .
			    $ch->{'display-name'}->[0]->[0] . '?', 1)) {
		my $xmltv_id = $ch->{id};
		unless ($chose_ch{$xmltv_id}++) {
		    print CONF "channel $xmltv_id\n";
		}
	    }
	}
	elsif (1 <= @poss and @poss < 25) {
	    my %dn_to_ch;
	    foreach (@poss) {
		my $dn = $_->{'display-name'}->[0]->[0];
		warn "more than one channel called $dn"
		  if exists $dn_to_ch{$dn};
		$dn_to_ch{$dn} = $_;
	    }
	    my $none_option = 'None of the above are what I wanted';
	    die 'silly channel name' if exists $dn_to_ch{$none_option};
	    my $r = ask_choice('Which channel to add?',
			$poss[0]->{'display-name'}->[0]->[0],
			(sort keys %dn_to_ch), $none_option);
	    # treat EOF as same as none-option
	    next if not defined $r;
	    next if $r eq $none_option;
	    my $ch = $dn_to_ch{$r}; die if not defined $ch;
	    my $xmltv_id = $ch->{id};
	    unless ($chose_ch{$xmltv_id}++) {
		print CONF "channel $xmltv_id\n";
	    }
	}
	elsif (25 <= @poss) {
	    say("'$in' matches lots of channels, be more specific.");
	}
	else { die }
    }

    close CONF or warn "cannot close $config_file: $!";
    say("Finished configuration.");
    exit();
}

# Grabbing.  Start by reading config file.
my @wanted_chs;
my $n = 0;
foreach (XMLTV::Config_file::read_lines $config_file) {
    ++$n;
    next if not defined;
    /^\s*channel\s+(\S+)\s*$/ or die "$config_file: $n: bad line $_\n";
    my $id = $1;
    if (not exists $channels{$id}) {
	warn "channel $id mentioned in $config_file but not on site\n";
	next;
    }
    push @wanted_chs, $id;
}
#@wanted_chs = sort keys %channels;

my $writer = new XMLTV::Writer(%g_args, encoding => 'ISO-8859-1');
$writer->start(\%tv_credits);
foreach (@wanted_chs) {
    my %h = %{$channels{$_}};
    delete $h{rt_id};
    $writer->write_channel(\%h);
}

$bar = Term::ProgressBar->new({ name => 'grabbing', count => scalar @wanted_chs })
  if Have_bar and not $opt_quiet;
my %warned_wrong_num_fields; # give that warning once per channel file
foreach my $ch (@wanted_chs) {
    my $c = $channels{$ch};
    my $rt_id = $channels{$ch}->{rt_id}; die if not defined $rt_id;

    # Try to get the base timezone for this channel from its name.
    my $base_tz;
    if ($c->{'display-name'}->[0]->[0] =~ /\((UTC|GMT|CET)\)\s*$/) {
	$base_tz = $1;
    }
    for ($base_tz) { $_ = 'UTC' if not defined }

    my $uri = "http://xmltv.radiotimes.com/xmltv/$rt_id.dat";
    local $SIG{__DIE__} = sub { die "$uri: $_[0]" };
    local $SIG{__WARN__} = sub { warn "$uri: $_[0]" };
    my $page = get_nice $uri;
    $page =~ tr/\207\211/\347\311/; # bad characters
    foreach (split /\n/, $page) {
	my @fields = split /\~/;
	if (@fields != 23) {
	    warn "wrong number of fields in line:\n$_\n"
	      unless $warned_wrong_num_fields{$ch}++;
	    next;
	}
	foreach (@fields) { s/^\s+//; s/\s+$//; undef $_ if not length }
	my ($title, $sub_title, $episode, $year, $director, $cast,
	    $premiere, $film, $repeat, $subtitles, $widescreen,
	    $new_series, $deaf_signed, $black_and_white, $star_rating,
	    $certificate, $genre, $desc, $choice, $date, $start, $stop,
	    $duration_mins) = @fields;
	foreach ($premiere, $film, $repeat, $subtitles, $widescreen,
		 $new_series, $deaf_signed, $black_and_white, $choice) {
	    die "true/false value not defined" if not defined;
	    if ($_ eq 'true') { $_ = 1 }
	    elsif ($_ eq 'false') { $_ = 0 }
	    else { die "bad true/false value $_" }
	}

	warn "ignoring sub-title $sub_title since episode also given\n"
	  if defined $sub_title and defined $episode;
	$sub_title = $episode if defined $episode;

	warn("missing title in: $_"), next if not defined $title;
	my %p = (channel => $ch, title => [ [ $title ] ]);
	for ($sub_title) { $p{'sub-title'} = [ [ $_ ] ] if defined }
	for ($year) { $p{date} = $_ if defined }
	for ($director) { $p{credits}{director} = [ $_ ] if defined }
	if (defined $cast) {
	    my @cast;
	    if ($cast =~ tr/|//) {
		@cast = split /\|/, $cast;
		# Each bit is in the format 'part*actor' and it seems that
		# even when part is 'director' that is the name of a
		# character.
		#
		foreach (@cast) {
		    s/^.*[*]// or warn "bad bit of cast list: $_";
		}
	    }
	    else {
		@cast = split /,/, $cast;
	    }
	    foreach (@cast) { s/^\s+//; s/\s+$// }
	    $p{credits}{actor} = \@cast;
	}
	$p{premiere} = [ '' ] if $premiere;
	push @{$p{category}}, [ 'film', 'en' ] if $film;
	$p{'previously-shown'} = {} if $repeat;
	$p{subtitles} = [ { type => 'teletext' } ] if $subtitles;
	$p{video}{aspect} = '16:9' if $widescreen;
	$p{new} = 1 if $new_series;
	# $deaf_signed ignored for now
	$p{video}{colour} = 0 if $black_and_white;
	$p{'star-rating'} = [ "$star_rating/5" ] if defined $star_rating;
	$p{rating} = [ [ $certificate, 'BBFC' ] ] if defined $certificate;
	push @{$p{category}}, [ $genre, 'en' ] if defined $genre;
	$p{desc} = [ [ $desc, 'en' ] ] if defined $desc;
	# $choice ignored for now

	# Date, start and stop time.
	my ($yyyy, $mm, $dd);
	for ($date) {
	    die "missing date in $_" if not defined;
	    m!(\d\d)/(\d\d)/(\d{4})$! or die "bad date $_";
	    ($dd, $mm, $yyyy) = ($1, $2, $3);
	}
	$p{start} = utc_offset "$yyyy$mm$dd$start", $base_tz;
	$p{stop} = utc_offset "$yyyy$mm$dd$stop", $base_tz;
	if (Date_Cmp($p{start}, $p{stop}) > 0) {
	    $p{stop} = utc_offset(DateCalc("$yyyy$mm$dd$stop", '+ 1 day'),
				  $base_tz);
	}
	# Ignore $duration_mins since it may not be reliable.

	$writer->write_programme(\%p);
    }
    update $bar if $bar;
}
$writer->end;
-------------- next part --------------
channel london.bbc1.bbc.co.uk
channel london.bbc2.bbc.co.uk
channel carlton.com
channel channel4.com
channel channel5.co.uk
channel sky-one.sky.com
channel mix.skyone.co.uk
channel news-24.bbc.co.uk
channel sky-news.sky.com
channel itn.co.uk
channel e4.channel4.com
channel plus-1.e4.channel4.com
channel itv2.itv.co.uk
channel fxuk.tv
channel paramountcomedy.com
channel 2.paramountcomedy.com
channel uk.scifi.com
channel bravo.co.uk
channel plus-1.bravo.co.uk
channel uk-gold.flextech.telewest.co.uk
channel plus-1.uk-gold.flextech.telewest.co.uk
channel uk-gold-2.flextech.telewest.co.uk
channel uk.cartoonnetwork.com
channel plus-1.uk.cartoonnetwork.com
channel Toonami.cartoonnetwork.com
channel cinema.sky.com
channel 2.cinema.sky.com
channel ondigital.premier.sky.com
channel moviemax.sky.com
channel 2.premier.sky.com
channel 2.moviemax.sky.com
channel 3.premier.sky.com
channel 3.moviemax.sky.com
channel 4.premier.sky.com
channel 5.moviemax.sky.com
channel 4.moviemax.sky.com
channel discoveryeurope.com
channel kerrang.com
channel kiss-tv.kiss100.com
channel mtv.co.uk
channel base.mtv.co.uk
channel dance.mtv.co.uk
channel hits.mtv.co.uk
channel mtv2europe.com
channel men.granadatv.co.uk
channel vh1online.co.uk
channel classic.vh1online.co.uk
channel C1482.radiotimes.com
channel uk-documentary.flextech.telewest.co.uk
channel plus-1.uk-documentary.flextech.telewest.co.uk
channel plus-1.discoveryeurope.com
channel animalplanet.discoveryeurope.com
channel C1761.radiotimes.com
channel adventure-one.nationalgeographic.co.uk
channel parliament.bbc.co.uk
channel choice.bbc.co.uk
channel knowledge.bbc.co.uk
channel biography-channel.biography.com
channel bloomberg.com
channel boomerang.cartoonnetwork.com
channel europe.cnbc.com
channel europe.cnn.com
channel civilisations.discoveryeurope.com
channel C1764.radiotimes.com
channel health.discovery.com
channel home.discoveryeurope.com
channel sci-trek.discoveryeurope.com
channel wings.discoveryeurope.com
channel travel.discoveryeurope.com
channel kids.discoveryeurope.com
channel disneychannel.disney.co.uk
channel plus-1.disneychannel.disney.co.uk
channel toon.disney.co.uk
channel eentertainment.com
channel ftv.com
channel flaunt.tv
channel plus-1.foxkids.co.uk
channel foxkids.co.uk
channel thehistorychannel.co.uk
channel plus-1.thehistorychannel.co.uk
channel nationalgeographic.co.uk
channel plus-1.nationalgeographic.co.uk
channel jr.nickelodeon.co.uk
channel replay.nickelodeon.co.uk
channel nickelodeon.co.uk
channel toons.nickelodeon.co.uk
channel playhouse.disney.co.uk
channel rte-1.rte.ie
channel n2.rte.ie
channel horrorchannel.com
channel hallmarkchannelint.com
channel ukhistory.tv
channel plus-1.ukhistory.tv
channel uk-drama.flextech.telewest.co.uk
channel uk-food.flextech.telewest.co.uk
channel plus-1.uk-food.flextech.telewest.co.uk
-------------- next part --------------
A non-text attachment was scrubbed...
Name: makechan.pl
Type: application/x-perl
Size: 6576 bytes
Desc: not available
Url : http://mythtv.org/pipermail/mythtv-users/attachments/20041020/abae4fd6/makechan.bin


More information about the mythtv-users mailing list