[mythtv-commits] Ticket #9074: allocine script for new metadata schema and allocine API
MythTV
mythtv at cvs.mythtv.org
Fri Oct 8 15:42:54 UTC 2010
#9074: allocine script for new metadata schema and allocine API
--------------------------------------------------------+-------------------
Reporter: Alexandra Lepercq <alexandra@…> | Owner: robertm
Type: enhancement | Status: assigned
Priority: minor | Milestone: 0.25
Component: Plugin - MythVideo | Version: Trunk Head
Severity: medium | Resolution:
Keywords: | Ticket locked: 0
--------------------------------------------------------+-------------------
Description changed by robertm:
Old description:
> Allocine has release an API for the metadata (thanks to
> http://wiki.gromez.fr/dev/api/allocine):
> http://api.allocine.fr/xml/movie?code=$movieid&partner=3
>
> I have made some modification based on the allocine.pl script from Xavier
> Hervy to be consistant with the allocine API and the mythtv 0.24 metadata
> schema
>
> I hope this helps
>
>
> #!/usr/bin/perl -w
>
> #
> # This perl script is intended to perform movie data lookups in french
> based on
> # the www.allocine.fr website
> #
> # For more information on MythVideo's external movie lookup mechanism,
> see
> # the README file in this directory.
> #
> # Original author: Xavier Hervy (maxpower44 AT tiscali DOT fr)
>
> # changes:
> # 20-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT
> soslinux DOT net )
> # Modified for the new allocine templates
> # 25-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT
> soslinux DOT net )
> # Poster download correction
> # Userrating correction
> # 28-10-2009: Robert McNamara (Myth Dev)
> # Fix issues in above patches-- files should never be downloaded to
> /tmp.
> # Convert script to output in new grabber output format for .23. Leave
> backwards compat.
> # 02-11-2009: Geoffroy Geerseau
> # Allocine have, once again, change their templates...
> # 06-08-2010: Alexandra Lepercq
> # Allocine have, once again, change their templates...
> # Add some data from api.allocine.fr (thanks to
> http://wiki.gromez.fr/dev/api/allocine)
> # http://api.allocine.fr/xml/movie?code=$movieid&partner=3
>
> use File::Basename;
> use File::Copy;
> use lib dirname($0);
> use Encode;
> use utf8;
> use Encode 'from_to';
> use MythTV::MythVideoCommon;
>
> use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_l $opt_M
> $opt_P $opt_originaltitle $opt_casting $opt_u_dummy);
> use Getopt::Long;
>
> $title = "Allocine Query";
> $version = "v2.06";
> $author = "Xavier Hervy";
> push(@MythTV::MythVideoCommon::URL_get_extras, ($title, $version));
>
> binmode(STDOUT, ":utf8");
>
> # display usage
> sub usage {
> print "usage: $0 -hviocMPD [parameters]\n";
> print " -h, --help help\n";
> print " -v, --version display version\n";
> print " -i, --info display info\n";
> print " -o, --originaltitle concatenate title and
> original title\n";
> print " -c, --casting with -D option, grap
> the complete actor list (much slower)\n";
> print "\n";
> print " -M <query>, --movie query> get movie list\n";
> print " -D <movieid>, --data <movieid> get movie data\n";
> print " -P <movieid>, --poster <movieid> get movie poster\n";
> exit(-1);
> }
>
> # display 1-line of info that describes the version of the program
> sub version {
> print "$title ($version) by $author\n"
> }
>
> # display 1-line of info that can describe the type of query used
> sub info {
> print "Performs queries using the www.allocine.fr website.\n";
> }
>
> # display detailed help
> sub help {
> version();
> info();
> usage();
> }
>
> # returns text within 'data' without tag
> sub removeTag {
> my ($data)=@_; # grab parameters
>
> my $ldata = lc($data);
> my $start = index($ldata, "<");
> my $finish = index($ldata, ">", $start)+1;
> while ($start != -1 && $finish != -1){
> $data = substr($data, 0, $start).substr($data, $finish,
> length($data));
> $ldata = lc($data);
> $start = index($ldata, "<");
> $finish = index($ldata, ">", $start)+1;
> }
> return $data;
> }
>
> # get Movie Data
> sub getMovieData {
> my ($movieid)=@_; # grab movieid parameter
> if (defined $opt_d) { printf("# looking for movie id: '%s'\n",
> $movieid);}
>
> # get Movie MetaData from api.allocine
> $requestAPI =
> "http://api.allocine.fr/xml/movie?code=$movieid&partner=3";
> $responseAPI = myth_url_get($requestAPI);
> from_to($responseAPI,'utf-8','iso-8859-1');
>
> # get the search results page
> my $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" .
> $movieid . ".html";
> my $allocineurl = $request;
> if (defined $opt_d) { printf("# request: '%s'\n", $request); }
> my ($rc, $response) = myth_url_get($request);
> from_to($response,'utf-8','iso-8859-1');
>
> # parse Title and Year
> # my $title = parseBetween($response, "<title>", "</title>");
> # $title =~ s/\s*-\s*AlloCin.*//;
> # $title =~ s/(.*)\(.*$/$1/;
> # $title =~ s/^\s*(.*)\s*$/$1/;
> # my $original_title = parseBetween($response, "Titre original
> :","<br");
> # $original_title = trim(removeTag($original_title));
> # if (defined $opt_originaltitle){
> # if ($original_title ne ""){
> # $title = $title . " (" . $original_title . ")";
> # }
> # }
> # $title = removeTag($title);
> # my $year =
> parseBetween(parseBetween($response,"/film/tous/decennie","/a>"),'>','<');
> my $titleApi = parseBetween($responseAPI,"<title>","</title>");
> my $originaltitleApi =
> parseBetween($responseAPI,"<originalTitle>","</originalTitle>");
> my $yearApi =
> parseBetween($responseAPI,"<productionYear>","</productionYear>");
>
> # parse Director
> # my $tempresponse = $response;
> # my $director = parseBetween($tempresponse,"Réalisé par
> ","</a></span>");
> # $director = removeTag($director);
>
> # my $directorApi =
> parseBetween($responseAPI,"<directors>","</directors>");
>
> # parse Plot
> # my $plot = parseBetween($response,"Synopsis : </span>","</p>");
> # $plot =~ s/\n//g;
> # $plot = trim(removeTag($plot));
> my $plotApi = parseBetween($responseAPI,"<synopsis>","</synopsis>");
>
> # parse User Rating
> # my $userrating=0;
> # my $tmpratings =
> parseBetween(parseBetween($response,"/film/critiquepublic_gen_cfilm=$movieid.html\"><img",
> "</span></p></div>"),'(',')');
> # $tmpratings =~ s/,/./gm;
> # if($tmpratings =~ /^(\d+\.?\d*|\.\d+)$/ && !$tmpratings eq "")
> # {
> # $userrating = int($tmpratings*2.5);
> # }
> # else
> # {
> # $userrating = "";
> # }
> my $userratingOrig =
> parseBetween($responseAPI,"<userRating>","</userRating>");
> $userratingApi = int($userratingOrig * 2.5);
>
> # parse Rating
> my $movierating = parseBetween($response,"Interdit aux moins de
> ","ans");
> if (!($movierating eq ""))
> { $movierating = "Interdit -" . $movierating . "ans";}
> else
> {
> $movierating = parseBetween($response,"Visible
> ","enfants");
> if (!($movierating eq "")){ $movierating = "Enfants";};
> }
>
> my $movieratingTout =
> parseBetween($responseAPI,"<ratingStats>","</ratingStats>");
>
> # parse Movie length
> # my $runtime = trim(parseBetween($response,"Durée :","min"));
> # my $heure;
> # my $minutes;
> # ($heure,$minutes)=($runtime=~/[^\d]*(\d+)[^\d]*(\d*)/);
> # if (!$heure){ $heure = 0; }
> # if (!$minutes){
> # $runtime = $heure * 60;
> # }else{
> # $runtime = $heure * 60 + $minutes;
> # }
> my $runtimeOrig = parseBetween($responseAPI,"<runtime>","</runtime>");
> $runtimeApi = $runtimeOrig / 60;
>
> # parse Cast
> # my $castchunk;
> # $castchunk = parseBetween($response, " Avec ","<a
> href=\"/film/casting_gen_cfilm=$movieid.html\" >plus</a>");
> # my $cast = "";
> # $cast = trim(join(',', removeTag($castchunk)));
> my $castApi = parseBetween($responseAPI,"<casting>","</casting>");
> $castApi =~ s!<castMember>!\n!g;
> $castApi =~ s!</castMember>!/>!g;
> $castApi =~ s/person code/person name/g;
> $castApi =~ s!</person>!"!g;
> $castApi =~ s/<activity code/ job/g;
> $castApi =~ s!</activity>!"!g;
> $castApi =~ s/<role>/ character="/g;
> $castApi =~ s!</role>!"!g;
> $castApi =~ s![0-9]!!g;
> $castApi =~ s!">!!g;
> $castApi =~ s/<picture href/ picture/g;
> $castApi =~ s!</picture>!"!g;
> $castApi =~
> s!picture="http://images.allocine.fr/medias/nmedia/////.jpg"!!g;
> $castApi =~ s!Réalisateur!director!g;
> $castApi =~ s!Acteur!actor!g;
> $castApi =~ s!Producteur!producer!g;
> $castApi =~ s!Compositeur!composer!g;
>
> #Genres
> # my $genres = parseBetween($response,"Genre :","<br");
> # $genres =~ s/\s*\n*(.*)\s*$/ $1/;
> # $genres = trim(removeTag($genres));
> # $genres =~ s/\s*\n*(.*)\s*$/ $1/;
> my $genreApi =
> parseBetween($responseAPI,"<genreList>","</genreList>");
> $genreApi =~ s/genre code/category name/g;
> $genreApi =~ s!</genre>!"/>\n!g;
> $genreApi =~ s![0-9]!!g;
> $genreApi =~ s!">!!g;
> # $genreApi =~ s!Musical!Comédie musicale!g;
> # $genreApi =~ s!Action!Aventure, Action!g;
> # $genreApi =~ s!Aventure!!g;
>
> #Countries
> # my $countries = parseBetween($response,"Long-métrage",".");
> # $countries = trim(removeTag($countries));
> # $countries =~ s/\s*(.*)\s*$/ $1/;
> # $countries = trim($countries);
> # $countries =~ s/\n//gm;
> # $countries =~ s/\s//gm;
> # $countries =~ s/,/, /gm;
> # if ($countries eq "allemand") { $countries = "Allemagne"; }
> # if ($countries eq "américain") { $countries = "États-Unis"; }
> # if ($countries eq "autrichien") { $countries = "Autriche"; }
> # if ($countries eq "britannique") { $countries = "Royaume Uni"; }
> # if ($countries eq "canadien") { $countries = "Canada"; }
> # if ($countries eq "français") { $countries = "France"; }
> # if ($countries eq "italien") { $countries = "Italie"; }
> # if ($countries eq "russe") { $countries = "Russie"; }
> my $countryApi =
> parseBetween($responseAPI,"<nationalityList>","</nationalityList>");
> $countryApi =~ s/nationality code/country name/g;
> $countryApi =~ s!</nationality>!"/>\n!g;
> $countryApi =~ s![0-9]!!g;
> $countryApi =~ s!">!!g;
> # $countryApi =~ s!Grande-Bretagne!Royaume Uni!g;
> # $countryApi =~ s!U.S.A.!États-Unis!g;
>
> # parse for Coverart
> # my $mediafile = parseBetween($response,"<a
> href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\"
> >");
> # $covrequest =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
> # ($rc, $covresponse) = myth_url_get($covrequest);
> # my $uri = parseBetween(parseBetween($covresponse,"<div class=\"tac\"
> style=\"\">","</div>"),"<img src=\"","\" alt");
> $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
> ($rc, $response) = myth_url_get($request);
> my $mediafile = parseBetween($response,"<a
> href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\"
> >");
> $request2 =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
> ($rc, $response2) = myth_url_get($request2);
> $uri = trim(parseBetween($response2,"<a Target=\"_blank\"
> Class=\"fs11\" href=\"","\">Agrandir</a>"));
> if ($uri eq "")
> {
> $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
> ($rc, $response) = myth_url_get($request);
> my $tmp_uri = parseBetween($response, "<a
> href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
> $tmp_uri =~ s/\n/ /gm;
> $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
> if($uri ne "")
> {
> $uri = "h$uri";
> }
> }
> # if no picture was found, just download the empty poster
> if($uri eq ""){
> $uri =
> "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
> }
>
> # output fields (these field names must match what MythVideo is
> looking for)
> # print "Title:$title\n";
> # if (!(defined $opt_originaltitle)){
> # print "OriginalTitle:$original_title\n";
> # }
> # print "URL:$allocineurl\n";
> # print "Year:$year\n";
> # print "Director:$director\n";
> # print "Plot:$plot\n";
> # print "UserRating:$userrating\n";
> # print "MovieRating:$movierating\n";
> # print "Runtime:$runtime\n";
> # print "Cast:$cast\n";
> # print "Genres:$genres\n";
> # print "Countries:$countries\n";
> # print "Coverart: $uri\n";
>
> # print "\n";
> # print "OriginaltitleApi:$originaltitleApi\n";
> # print "MovieratingTout:$movieratingTout\n";
> # print "\n";
>
>
> # MetaData output
> print "<?xml version='1.0' encoding='UTF-8'?>\n";
> print "<metadata>\n";
> print "<item>\n";
> print "<inetref>$movieid</inetref>\n";
> print "<title>$titleApi</title>\n";
> print "<language>fr</language>\n";
> print "<description>$plotApi</description>\n";
> print "<countries>\n";
> # print "<country name=\"$countries\"/>\n";
> print "$countryApi";
> print "</countries>\n";
> print "<categories>\n";
> # print "<category name=\"$genres\"/>\n";
> print "$genreApi";
> print "</categories>\n";
> print "<userrating>$userratingApi</userrating>\n";
> # print "<movierating>$movierating</movierating>\n";
> print "<year>$yearApi</year>\n";
> print "<runtime>$runtimeApi</runtime>\n";
> print "<homepage>$allocineurl</homepage>\n";
> # print "<trailerURL>$bandeannonceurl</trailerURL>\n";
> print "<people>";
> # print "<person name=\"$director\" job=\"Director\"/>\n";
> # print "<person name=\"$cast\" job=\"Actor\"/>\n";
> print "$castApi";
> print "</people>\n";
> print "<images>\n";
> print "<image type=\"coverart\" url=\"$uri\"/>\n";
> # print "<image type=\"fanart\" url=\"$fanarturi\"/>\n";
> # print "<image type=\"screenshot\" url=\"$screenshoturi\"/>\n";
> # print "<image type=\"banner\" url=\"$banneruri\"/>\n";
> print "</images>\n";
> print "</item>\n";
> print "</metadata>\n";
>
>
> }
>
> # dump Movie Poster
> sub getMoviePoster {
> my ($movieid)=@_; # grab movieid parameter
> if (defined $opt_d) { printf("# looking for movie id: '%s'\n",
> $movieid);}
>
> # get the search results page
>
> my $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
> if (defined $opt_d) { printf("# request: '%s'\n", $request); }
> my ($rc, $response) = myth_url_get($request);
> my $mediafile = parseBetween($response,"<a
> href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\"
> >");
>
> $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
> ($rc, $response) = myth_url_get($request);
> my $uri = parseBetween(parseBetween($response,"<div class=\"tac\"
> style=\"\">","</div>"),"<img src=\"","\" alt");
> if ($uri eq "")
> {
> $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
> ($rc, $response) = myth_url_get($request);
> my $tmp_uri = parseBetween($response, "<a
> href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
> $tmp_uri =~ s/\n/ /gm;
> $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
> if($uri ne "")
> {
> $uri = "h$uri";
> }
> print "$uri\n";
> }
>
> # if no picture was found, just download the empty poster
> if($uri eq ""){
> $uri =
> "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
> }
>
> print "$uri\n";
> }
>
> sub getMovieList {
> my ($filename, $options) = @_; # grab parameters
>
> my $query = cleanTitleQuery($filename);
> if (!$options) { $options = ""; }
> if (defined $opt_d) {
> printf("# query: '%s', options: '%s'\n", $query,
> $options);
> }
>
> # get the search results page
> my $request = "http://www.allocine.fr/recherche/1/?q=$query";
> if (defined $opt_d) { printf("# request: '%s'\n", $request); }
> my ($rc, $response) = myth_url_get($request);
> from_to($response,'utf-8','iso-8859-1');
> $response =~ s/\n//g;
> # extract possible matches
> # possible matches are grouped in several catagories:
> # exact, partial, and approximate
> my $exact_matches = $response;
> # parse movie list from matches
> my $beg = "<div style=\"margin-top:-5px;\">";
> my $end = "<span class=\"fs11\">";
>
> my @movies;
>
> my $data = $exact_matches;
> if ($data eq "") {
> if (defined $opt_d) { printf("# no results\n"); }
> } else {
> my $start = index($data, $beg);
> my $finish = index($data, $end, $start);
>
> my $title;
> my $movienum;
> my $moviename;
> while ($start != -1) {
> $start += length($beg);
> my $sub1 = substr($data, $start, $finish -
> $start);
> $sub1 =~ s/(.*)\(.*$/$1/;
> $moviename = trim(removeTag($sub1));
> $movienum = parseBetween($sub1,"<a
> href='/film/fichefilm_gen_cfilm=",".html");
>
> $title = removeTag($moviename);
> $moviename = removeTag($moviename);
> my ($movieyear)= $moviename =~/\((\d+)\)/;
> if ($movieyear) {
> $title = $title." (".$movieyear.")";
> }
> $moviename=$title ;
>
> # advance data to next movie
> $data = substr($data, - (length($data) -
> $finish));
> $start = index($data, $beg);
> $finish = index($data, $end, $start);
>
> # add to array
> push(@movies, "$movienum:$moviename");
> }
>
> # display array of values
> for $movie (@movies) {
> print "$movie\n";
> }
> }
> }
>
> #
> # Main Program
> #
>
> # parse command line arguments
>
> GetOptions( "utf8" => \$opt_u_dummy,
> "version" => \$opt_v,
> "info" => \$opt_i,
> "language" => \$opt_l,
> "originaltitle" => \$opt_originaltitle,
> "casting" => \$opt_casting,
> "Data" => \$opt_D,
> "Movie" => \$opt_M,
> "Poster" => \$opt_P
> );
>
> # print out info
> if (defined $opt_v) { version(); exit 1; }
> if (defined $opt_i) { info(); exit 1; }
> if (defined $opt_l) {
> my $lang = shift;
> }
>
> # print out usage if needed
> if (defined $opt_h || $#ARGV<0) { help(); }
>
> if (defined $opt_D) {
> # take movieid from cmdline arg
> $movieid = shift || die "Usage : $0 -D <movieid>\n";
> getMovieData($movieid);
> }
>
> elsif (defined $opt_P) {
> # take movieid from cmdline arg
> $movieid = shift || die "Usage : $0 -P <movieid>\n";
> getMoviePoster($movieid);
> }
>
> elsif (defined $opt_M) {
> # take query from cmdline arg
> #$options = shift || die "Usage : $0 -M <query>\n";
> my $query;
> my $options = '';
> foreach $key (0 .. $#ARGV) {
> $query .= $ARGV[$key]. ' ';
> }
> getMovieList($query, $options);
> }
> # vim: set expandtab ts=3 sw=3 :
New description:
Allocine has release an API for the metadata (thanks to
http://wiki.gromez.fr/dev/api/allocine):
http://api.allocine.fr/xml/movie?code=$movieid&partner=3
I have made some modification based on the allocine.pl script from Xavier
Hervy to be consistant with the allocine API and the mythtv 0.24 metadata
schema
I hope this helps
--
--
Ticket URL: <http://svn.mythtv.org/trac/ticket/9074#comment:2>
MythTV <http://www.mythtv.org/>
MythTV Media Center
More information about the mythtv-commits
mailing list