#!/usr/bin/perl ### ------------------------------------------------------------------------- ### Program: abchealth.pl: ### Author : Phill Edwards ### Version: 1.0 Date : Jul 2006 ### ### ### This is a MythStream parser or harvester (not sure if there's a diff). ### The content it gets is ABC Australia Health Minutes videos. ### ### ### To use this MythStream parser/harvester add these lines to streams.res ### (without the ###'s!): ### [item] ### ABC Streams ### Health Minutes ### http://127.0.0.1/dummy.html?vidqual=hq ### ABC Health Minutes ### abchealth ### ### ### Params: ### MythStream passes in the URL which is specified in streams.res ### so we get eg http://127.0.0.1/dummy.html?vidqual=hq as $ARGV[1] ### ### To run or test this from the cmd line run this command: ### abchealth.pl BLAH http://127.0.0.1/dummy.html?vidqual=hq ### ------------------------------------------------------------------------- use strict; use warnings; use LWP::Simple; # Reqd for get http command. ###---------------------------------------------------------------------- ### Set ABC domain and Health Minutes URL and get the video quality ### parameter passed in on cmd line. ###---------------------------------------------------------------------- my $domain = "http://www.abc.net.au/"; # ABC domain name my $hmurl = "$domain//health/minutes/video/vod/meta/"; # Health Minutes URL my %params = get_params($ARGV[1]); my $vidqual = $params{'vidqual'}; # lq,hq=low,high bandwidth vids ###---------------------------------------------------------------------- ### Get the URL containing the Health Minutes videos. ### This contains the titles & MMS urls for the Health Minutes stories. ###---------------------------------------------------------------------- my $hdline; # Holds headline retrieved from storyN.htm my $mmsurl; # Holds MMS URL to video for headline my @hdlarr; # Array of headlines my @mmsarr; # Array of mms filenames my $ctr = 1; # Line counter my $asxfile = get($hmurl.$vidqual."1.asx"); # Download Health Minutes page if (!defined($asxfile)) { print "abchealth: ERROR - no asxfile!\n"; exit 1; } else { ### Use split to break up the file which comes down as one big long line ### using as the split point so we get separate lines for each ### story. my @lines = split(/\n/, $asxfile); ### Loop through each line, copying each line into the variable $line ### until we get the title line then extract the headline (and just the ### headline) from it. foreach my $line (@lines) { if ($line =~ //) { $hdline = $line; $hdline =~ s/^.*<title>//; $hdline =~ s/<\/title>.*$//; ### For some reason ctrl char ^M is at end of the var so get rid of it $hdline =~ s/[[:cntrl:]]//g; $hdlarr[$ctr] = $hdline; } ### Get the MMS url from the lines beginning with <ref href= if ($line =~ /<ref href/) { $mmsurl = $line; $mmsurl =~ s/^.*<ref href=\"mms/mms/; $mmsurl =~ s/"//g; $mmsurl =~ s/\/>//g; ### Get rid of any ctrl chars $mmsurl =~ s/[[:cntrl:]]//g; $mmsarr[$ctr] = $mmsurl; $ctr++; } } # end for loopeach line loop } # end main if ###---------------------------------------------------------------------- ### Print the headline and MMS filename out to a streams.res ### XML format file. ###---------------------------------------------------------------------- ### Print streams.res format XML output for MythStream. If outputting to a ### file we need to redirect STDOUT to the required filename. ### Loop through the headline and MMS arrays to extract the data to print. print "<items>\n"; for ( my $arrctr=1; $arrctr<=$#hdlarr; $arrctr++ ) { ### Output in mythstream streams.res format print "<item>\n"; print " <name>$hdlarr[$arrctr]</name>\n"; print " <url>$mmsarr[$arrctr]</url>\n"; print " <descr></descr>\n"; print " <handler></handler>\n"; print "</item>\n"; } print "</items>\n"; exit; # Exit program ### ---------------------------------------------------------------------- ### Get the cmd line param passed to it and parse the name=value pairs ### into a hash array which is returned to the calling routine. ### Should split eg vidqual=lo into a hash table with key ### 'vidqual' and "lo" as the corresponding value. ### ---------------------------------------------------------------------- sub get_params { my %parr; my ($varname, $varval, $param1); ### Remove the http part up to the GET string from parameter $param1 = $_[0]; $param1 =~ s/^http.*\?//; my @arr = split(/\&/, $param1); foreach my $pair (@arr) { ($varname, $varval) = split(/=/, $pair); $parr{$varname} = $varval; } return %parr; }