#! /usr/bin/perl ### ------------------------------------------------------------------------- ### Program: bbc.pl: ### Author : Robin Gilks ### Date : Aug 2005 ### ### This is a MythStream parser or harvester (not sure if there's a diff). ### The idea is to get the RealPlayer historic content from the various ### radio stations that the BBC runs. Most programs are available for streaming ### for up to 7 days after broadcast. ### MythStream passes in the URL which is specified in streams.res ### or the database so we get eg ### streams.res = radio/aod/networks/radio4/audiolist.shtml ### get http://www.bbc.co.uk/radio/aod/networks/radio4/audiolist.shtml ### we parse out the lines with 'bbcplayer' in them eg. ###
  • The Archers
  • ###
  • Just a Minute
  • ###
  • Afternoon Play
  • ### We keep the tag as the name and if there is anything after the ### then we use that as the description. We then pass back with a ### handler=level2 to display the list we have eg. ### ### Afternoon Play ### aod.shtml?radio4/afternoonplay ### bbc_l2 ### ### When the user selects a show we parse the page returned to ### find the 'var AudioStream = "...." url and extract it eg. ### var AudioStream = "/radio/aod/shows/rpms/radio4/archers"; ### Append '.rpm' and get that eg. ### get http://www.bbc.co.uk/radio/aod/shows/rpms/radio4/archers.rpm ### The contents of the rpm file is the rtsp url we want which ### we return!! eg. ### ### Dummy ### rtsp://rmv8.bbc.net.uk/radio4/archers/archers_mon.ra ### ### This is thankfully played immediately :-)) ### ------------------------------------------------------------------------- use English; use XML::DOM; #------------------------------------------------------------------------------ # Init #------------------------------------------------------------------------------ &read_parse(); # get commandline parameters into @in $source = $in[0]; # source filename from command line my $doc = XML::DOM::Document->new; my $head = $doc->createXMLDecl ('1.0'); my $root = $doc->createElement('items'); sub newNode { local $name = shift; local $value = shift; local $node = $doc->createElement($name); local $text = $doc->createTextNode($value); $node->appendChild($text); return $node; } #------------------------------------------------------------------------------ # read file into $data #------------------------------------------------------------------------------ $datafile = $source; open( INFO, "<$datafile" ); # Open file for reading undef $/; $data = ; # Read all close(INFO) ; #------------------------------------------------------------------------------ # search url's in $data and place them in special format #------------------------------------------------------------------------------ $urlpat = "\\w\\.\\-\\/\\:\\?\\&\\=\\_\\~\\*\\@;,\\+\\$\\(\\)"; # get title $data =~ s/]target=\"(.*?)\"*>(.*?)<\/a/\nHYPER***$1***HYPER***$2***HYPER***$3***HYPER\n/gi; # remove $data =~ s///gi; # check this: should extract description from alt="[description]" # get FRAME src=url $data =~ s/FRAME\s+.*?src\s*=\s*"?'?([$urlpat]+)"?'?/\nHYPER***$1***HYPER***$1***HYPER***$1***HYPER\n/gi; @lines = split ( "\n", $data); foreach $line(@lines) { if (@matches = ( $line =~ m/HYPER\*\*\*(.*)\*\*\*HYPER\*\*\*(.*)\*\*\*HYPER\*\*\*(.*)\*\*\*HYPER/ ) ) { if (@matches[1] =~ m/bbcplayer/) { # remove html tags in name $name = @matches[2]; $name =~ s/<.*?>//g; $name =~ s/&/&/g; $item = $doc->createElement('item'); $root->appendChild($item); $item->appendChild( newNode('name', $name) ); $item->appendChild( newNode('url', @matches[0]) ); $item->appendChild( newNode('handler', "bbc_l2") ); } } } print $head->toString; print $root->toString; print "\n"; #-------------------------------------------------------------------------------- # get command line parameters #-------------------------------------------------------------------------------- sub read_parse { local (*in) = @_ if @_; local ($i); push(@in, @ARGV); foreach $i (0 .. $#in) { $in[$i] =~ s/\+/ /g;} return scalar(@in); }