[mythtv-commits] Ticket #9074: allocine script for new metadata schema and allocine API

MythTV mythtv at cvs.mythtv.org
Fri Oct 8 15:42:54 UTC 2010


#9074: allocine script for new metadata schema and allocine API
--------------------------------------------------------+-------------------
 Reporter:  Alexandra Lepercq <alexandra@…>             |           Owner:  robertm   
     Type:  enhancement                                 |          Status:  assigned  
 Priority:  minor                                       |       Milestone:  0.25      
Component:  Plugin - MythVideo                          |         Version:  Trunk Head
 Severity:  medium                                      |      Resolution:            
 Keywords:                                              |   Ticket locked:  0         
--------------------------------------------------------+-------------------
Description changed by robertm:

Old description:

> Allocine has release an API for the metadata (thanks to
> http://wiki.gromez.fr/dev/api/allocine):
> http://api.allocine.fr/xml/movie?code=$movieid&partner=3
>
> I have made some modification based on the allocine.pl script from Xavier
> Hervy to be consistant with the allocine API and the mythtv 0.24 metadata
> schema
>
> I hope this helps
>

>
> #!/usr/bin/perl -w
>
> #
> # This perl script is intended to perform movie data lookups in french
> based on
> # the www.allocine.fr website
> #
> # For more information on MythVideo's external movie lookup mechanism,
> see
> # the README file in this directory.
> #
> # Original author: Xavier Hervy (maxpower44 AT tiscali DOT fr)
>
> # changes:
> #   20-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT
> soslinux DOT net )
> #   Modified for the new allocine templates
> #   25-10-2009: Geoffroy Geerseau ( http://www.soslinux.net : jamdess AT
> soslinux DOT net )
> #   Poster download correction
> #   Userrating correction
> #   28-10-2009: Robert McNamara (Myth Dev)
> #   Fix issues in above patches-- files should never be downloaded to
> /tmp.
> #   Convert script to output in new grabber output format for .23.  Leave
> backwards compat.
> #   02-11-2009: Geoffroy Geerseau
> #   Allocine have, once again, change their templates...
> #   06-08-2010: Alexandra Lepercq
> #   Allocine have, once again, change their templates...
> #   Add some data from api.allocine.fr (thanks to
> http://wiki.gromez.fr/dev/api/allocine)
> #       http://api.allocine.fr/xml/movie?code=$movieid&partner=3
>
> use File::Basename;
> use File::Copy;
> use lib dirname($0);
> use Encode;
> use utf8;
> use Encode 'from_to';
> use MythTV::MythVideoCommon;
>
> use vars qw($opt_h $opt_r $opt_d $opt_i $opt_v $opt_D $opt_l $opt_M
> $opt_P $opt_originaltitle $opt_casting $opt_u_dummy);
> use Getopt::Long;
>
> $title = "Allocine Query";
> $version = "v2.06";
> $author = "Xavier Hervy";
> push(@MythTV::MythVideoCommon::URL_get_extras, ($title, $version));
>
> binmode(STDOUT, ":utf8");
>
> # display usage
> sub usage {
>    print "usage: $0 -hviocMPD [parameters]\n";
>    print "       -h, --help                       help\n";
>    print "       -v, --version                    display version\n";
>    print "       -i, --info                       display info\n";
>    print "       -o, --originaltitle              concatenate title and
> original title\n";
>    print "       -c, --casting                    with -D option, grap
> the complete actor list (much slower)\n";
>    print "\n";
>    print "       -M <query>,   --movie query>     get movie list\n";
>    print "       -D <movieid>, --data <movieid>   get movie data\n";
>    print "       -P <movieid>, --poster <movieid> get movie poster\n";
>    exit(-1);
> }
>
> # display 1-line of info that describes the version of the program
> sub version {
>    print "$title ($version) by $author\n"
> }
>
> # display 1-line of info that can describe the type of query used
> sub info {
>    print "Performs queries using the www.allocine.fr website.\n";
> }
>
> # display detailed help
> sub help {
>    version();
>    info();
>    usage();
> }
>
> # returns text within 'data' without tag
> sub removeTag {
>    my ($data)=@_; # grab parameters
>
>    my $ldata = lc($data);
>    my $start = index($ldata, "<");
>    my $finish = index($ldata, ">", $start)+1;
>    while ($start != -1 && $finish != -1){
>       $data = substr($data, 0, $start).substr($data, $finish,
> length($data));
>       $ldata = lc($data);
>       $start = index($ldata, "<");
>       $finish = index($ldata, ">", $start)+1;
>    }
>    return $data;
> }
>

> # get Movie Data
> sub getMovieData {
>    my ($movieid)=@_; # grab movieid parameter
>    if (defined $opt_d) { printf("# looking for movie id: '%s'\n",
> $movieid);}
>
>    # get Movie MetaData from api.allocine
>    $requestAPI =
> "http://api.allocine.fr/xml/movie?code=$movieid&partner=3";
>    $responseAPI = myth_url_get($requestAPI);
>    from_to($responseAPI,'utf-8','iso-8859-1');
>

>    # get the search results  page
>    my $request = "http://www.allocine.fr/film/fichefilm_gen_cfilm=" .
> $movieid . ".html";
>    my $allocineurl = $request;
>    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
>    my ($rc, $response) = myth_url_get($request);
>    from_to($response,'utf-8','iso-8859-1');
>

>    # parse Title and Year
> #   my $title = parseBetween($response, "<title>", "</title>");
> #   $title =~ s/\s*-\s*AlloCin.*//;
> #   $title =~ s/(.*)\(.*$/$1/;
> #   $title =~ s/^\s*(.*)\s*$/$1/;
> #   my $original_title = parseBetween($response, "Titre original
> :","<br");
> #   $original_title = trim(removeTag($original_title));
> #   if (defined $opt_originaltitle){
> #      if ($original_title ne  ""){
> #        $title = $title . " (" . $original_title . ")";
> #      }
> #   }
> #   $title = removeTag($title);
> #   my $year =
> parseBetween(parseBetween($response,"/film/tous/decennie","/a>"),'>','<');
>    my $titleApi = parseBetween($responseAPI,"<title>","</title>");
>    my $originaltitleApi =
> parseBetween($responseAPI,"<originalTitle>","</originalTitle>");
>    my $yearApi =
> parseBetween($responseAPI,"<productionYear>","</productionYear>");
>

>    # parse Director
> #   my $tempresponse = $response;
> #   my $director = parseBetween($tempresponse,"Réalisé par
> ","</a></span>");
> #   $director = removeTag($director);
>
> #   my $directorApi =
> parseBetween($responseAPI,"<directors>","</directors>");
>

>    # parse Plot
> #   my $plot = parseBetween($response,"Synopsis : </span>","</p>");
> #   $plot =~ s/\n//g;
> #   $plot = trim(removeTag($plot));
>    my $plotApi = parseBetween($responseAPI,"<synopsis>","</synopsis>");
>

>    # parse User Rating
> #   my $userrating=0;
> #   my $tmpratings =
> parseBetween(parseBetween($response,"/film/critiquepublic_gen_cfilm=$movieid.html\"><img",
> "</span></p></div>"),'(',')');
> #   $tmpratings =~ s/,/./gm;
> #   if($tmpratings =~ /^(\d+\.?\d*|\.\d+)$/ && !$tmpratings eq "")
> #   {
> #       $userrating = int($tmpratings*2.5);
> #   }
> #   else
> #   {
> #       $userrating =  "";
> #   }
>    my $userratingOrig =
> parseBetween($responseAPI,"<userRating>","</userRating>");
>    $userratingApi = int($userratingOrig * 2.5);
>

>    # parse Rating
>    my $movierating = parseBetween($response,"Interdit aux moins de
> ","ans");
>    if (!($movierating eq ""))
>         { $movierating = "Interdit -" . $movierating . "ans";}
>    else
>         {
>                 $movierating = parseBetween($response,"Visible
> ","enfants");
>                 if (!($movierating eq "")){ $movierating = "Enfants";};
>         }
>
>    my $movieratingTout =
> parseBetween($responseAPI,"<ratingStats>","</ratingStats>");
>

>    # parse Movie length
> #   my $runtime = trim(parseBetween($response,"Durée :","min"));
> #   my $heure;
> #   my $minutes;
> #   ($heure,$minutes)=($runtime=~/[^\d]*(\d+)[^\d]*(\d*)/);
> #   if (!$heure){ $heure = 0; }
> #   if (!$minutes){
> #      $runtime = $heure * 60;
> #   }else{
> #       $runtime = $heure * 60 + $minutes;
> #   }
>    my $runtimeOrig = parseBetween($responseAPI,"<runtime>","</runtime>");
>    $runtimeApi = $runtimeOrig / 60;
>

>    # parse Cast
> #   my $castchunk;
> #   $castchunk = parseBetween($response, "      Avec ","<a
> href=\"/film/casting_gen_cfilm=$movieid.html\" >plus</a>");
> #   my $cast = "";
> #   $cast = trim(join(',', removeTag($castchunk)));
>    my $castApi = parseBetween($responseAPI,"<casting>","</casting>");
>    $castApi =~ s!<castMember>!\n!g;
>    $castApi =~ s!</castMember>!/>!g;
>    $castApi =~ s/person code/person name/g;
>    $castApi =~ s!</person>!"!g;
>    $castApi =~ s/<activity code/ job/g;
>    $castApi =~ s!</activity>!"!g;
>    $castApi =~ s/<role>/ character="/g;
>    $castApi =~ s!</role>!"!g;
>    $castApi =~ s![0-9]!!g;
>    $castApi =~ s!">!!g;
>    $castApi =~ s/<picture href/ picture/g;
>    $castApi =~ s!</picture>!"!g;
>    $castApi =~
> s!picture="http://images.allocine.fr/medias/nmedia/////.jpg"!!g;
>    $castApi =~ s!Réalisateur!director!g;
>    $castApi =~ s!Acteur!actor!g;
>    $castApi =~ s!Producteur!producer!g;
>    $castApi =~ s!Compositeur!composer!g;
>

>    #Genres
> #   my $genres = parseBetween($response,"Genre :","<br");
> #   $genres =~ s/\s*\n*(.*)\s*$/ $1/;
> #   $genres = trim(removeTag($genres));
> #   $genres =~ s/\s*\n*(.*)\s*$/ $1/;
>    my $genreApi =
> parseBetween($responseAPI,"<genreList>","</genreList>");
>    $genreApi =~ s/genre code/category name/g;
>    $genreApi =~ s!</genre>!"/>\n!g;
>    $genreApi =~ s![0-9]!!g;
>    $genreApi =~ s!">!!g;
> #   $genreApi =~ s!Musical!Comédie musicale!g;
> #   $genreApi =~ s!Action!Aventure, Action!g;
> #   $genreApi =~ s!Aventure!!g;
>

>    #Countries
> #   my $countries = parseBetween($response,"Long-métrage",".");
> #   $countries = trim(removeTag($countries));
> #   $countries =~ s/\s*(.*)\s*$/ $1/;
> #   $countries = trim($countries);
> #   $countries =~ s/\n//gm;
> #   $countries =~ s/\s//gm;
> #   $countries =~ s/,/, /gm;
> #   if ($countries eq "allemand") { $countries = "Allemagne"; }
> #   if ($countries eq "américain") { $countries = "États-Unis"; }
> #   if ($countries eq "autrichien") { $countries = "Autriche"; }
> #   if ($countries eq "britannique") { $countries = "Royaume Uni"; }
> #   if ($countries eq "canadien") { $countries = "Canada"; }
> #   if ($countries eq "français") { $countries = "France"; }
> #   if ($countries eq "italien") { $countries = "Italie"; }
> #   if ($countries eq "russe") { $countries = "Russie"; }
>    my $countryApi =
> parseBetween($responseAPI,"<nationalityList>","</nationalityList>");
>    $countryApi =~ s/nationality code/country name/g;
>    $countryApi =~ s!</nationality>!"/>\n!g;
>    $countryApi =~ s![0-9]!!g;
>    $countryApi =~ s!">!!g;
> #   $countryApi =~ s!Grande-Bretagne!Royaume Uni!g;
> #   $countryApi =~ s!U.S.A.!États-Unis!g;
>

>    # parse for Coverart
> #   my $mediafile = parseBetween($response,"<a
> href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\"
> >");
> #   $covrequest =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
> #   ($rc, $covresponse) = myth_url_get($covrequest);
> #   my $uri = parseBetween(parseBetween($covresponse,"<div class=\"tac\"
> style=\"\">","</div>"),"<img src=\"","\" alt");
>    $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
>    ($rc, $response) = myth_url_get($request);
>    my $mediafile = parseBetween($response,"<a
> href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\"
> >");
>    $request2 =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
>    ($rc, $response2) = myth_url_get($request2);
>    $uri = trim(parseBetween($response2,"<a Target=\"_blank\"
> Class=\"fs11\" href=\"","\">Agrandir</a>"));
>    if ($uri eq "")
>    {
>         $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
>         ($rc, $response) = myth_url_get($request);
>         my $tmp_uri = parseBetween($response, "<a
> href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
>         $tmp_uri =~ s/\n/ /gm;
>         $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
>         if($uri ne "")
>         {
>                 $uri = "h$uri";
>         }
>    }
>    # if no picture was found, just download the empty poster
>    if($uri eq ""){
>         $uri =
> "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
>    }
>

>    # output fields (these field names must match what MythVideo is
> looking for)
> #   print "Title:$title\n";
> #   if (!(defined $opt_originaltitle)){
> #    print "OriginalTitle:$original_title\n";
> #   }
> #   print "URL:$allocineurl\n";
> #   print "Year:$year\n";
> #   print "Director:$director\n";
> #   print "Plot:$plot\n";
> #   print "UserRating:$userrating\n";
> #   print "MovieRating:$movierating\n";
> #   print "Runtime:$runtime\n";
> #   print "Cast:$cast\n";
> #   print "Genres:$genres\n";
> #   print "Countries:$countries\n";
> #   print "Coverart: $uri\n";
>
> #   print "\n";
> #   print "OriginaltitleApi:$originaltitleApi\n";
> #   print "MovieratingTout:$movieratingTout\n";
> #   print "\n";
>

>
>    # MetaData output
> print "<?xml version='1.0' encoding='UTF-8'?>\n";
> print "<metadata>\n";
>   print "<item>\n";
>     print "<inetref>$movieid</inetref>\n";
>     print "<title>$titleApi</title>\n";
>     print "<language>fr</language>\n";
>     print "<description>$plotApi</description>\n";
>     print "<countries>\n";
> #      print "<country name=\"$countries\"/>\n";
>       print "$countryApi";
>     print "</countries>\n";
>     print "<categories>\n";
> #      print "<category name=\"$genres\"/>\n";
>       print "$genreApi";
>     print "</categories>\n";
>     print "<userrating>$userratingApi</userrating>\n";
> #    print "<movierating>$movierating</movierating>\n";
>     print "<year>$yearApi</year>\n";
>     print "<runtime>$runtimeApi</runtime>\n";
>     print "<homepage>$allocineurl</homepage>\n";
> #    print "<trailerURL>$bandeannonceurl</trailerURL>\n";
>     print "<people>";
> #      print "<person name=\"$director\" job=\"Director\"/>\n";
> #      print "<person name=\"$cast\" job=\"Actor\"/>\n";
>       print "$castApi";
>     print "</people>\n";
>     print "<images>\n";
>       print "<image type=\"coverart\" url=\"$uri\"/>\n";
> #      print "<image type=\"fanart\" url=\"$fanarturi\"/>\n";
> #      print "<image type=\"screenshot\" url=\"$screenshoturi\"/>\n";
> #      print "<image type=\"banner\" url=\"$banneruri\"/>\n";
>     print "</images>\n";
>   print "</item>\n";
> print "</metadata>\n";
>

>
> }
>
> # dump Movie Poster
> sub getMoviePoster {
>    my ($movieid)=@_; # grab movieid parameter
>    if (defined $opt_d) { printf("# looking for movie id: '%s'\n",
> $movieid);}
>
>    # get the search results  page
>
>    my $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
>    if (defined $opt_d) { printf("# request: '%s'\n", $request); }
>    my ($rc, $response) = myth_url_get($request);
>    my $mediafile = parseBetween($response,"<a
> href=\"/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=","\"
> >");
>
>    $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/detail/?cmediafile=".$mediafile;
>    ($rc, $response) = myth_url_get($request);
>    my $uri = parseBetween(parseBetween($response,"<div class=\"tac\"
> style=\"\">","</div>"),"<img src=\"","\" alt");
>    if ($uri eq "")
>    {
>         $request =
> "http://www.allocine.fr/film/fichefilm-".$movieid."/affiches/";
>         ($rc, $response) = myth_url_get($request);
>         my $tmp_uri = parseBetween($response, "<a
> href=\"/film/fichefilm-".$movieid."/affiches/\">"," alt=");
>         $tmp_uri =~ s/\n/ /gm;
>         $uri = trim(parseBetween($tmp_uri,"<img src='h","'"));
>         if($uri ne "")
>         {
>                 $uri = "h$uri";
>         }
>         print "$uri\n";
>    }
>
>    # if no picture was found, just download the empty poster
>    if($uri eq ""){
>         $uri =
> "http://images.allocine.fr/r_160_214/commons/emptymedia/AffichetteAllocine.gif";
>    }
>
>    print "$uri\n";
> }
>
> sub getMovieList {
>         my ($filename, $options) = @_; # grab parameters
>
>         my $query = cleanTitleQuery($filename);
>         if (!$options) { $options = ""; }
>         if (defined $opt_d) {
>                 printf("# query: '%s', options: '%s'\n", $query,
> $options);
>         }
>
>         # get the search results  page
>         my $request = "http://www.allocine.fr/recherche/1/?q=$query";
>         if (defined $opt_d) { printf("# request: '%s'\n", $request); }
>         my ($rc, $response) = myth_url_get($request);
>         from_to($response,'utf-8','iso-8859-1');
>         $response =~ s/\n//g;
>         # extract possible matches
>         #    possible matches are grouped in several catagories:
>         #        exact, partial, and approximate
>         my $exact_matches = $response;
>         # parse movie list from matches
>         my $beg = "<div style=\"margin-top:-5px;\">";
>         my $end = "<span class=\"fs11\">";
>
>         my @movies;
>
>         my $data = $exact_matches;
>         if ($data eq "") {
>                 if (defined $opt_d) { printf("# no results\n"); }
>         } else {
>                 my $start = index($data, $beg);
>                 my $finish = index($data, $end, $start);
>
>                 my $title;
>                 my $movienum;
>                 my $moviename;
>                 while ($start != -1) {
>                         $start += length($beg);
>                         my $sub1 = substr($data, $start, $finish -
> $start);
>                         $sub1 =~ s/(.*)\(.*$/$1/;
>                         $moviename = trim(removeTag($sub1));
>                         $movienum = parseBetween($sub1,"<a
> href='/film/fichefilm_gen_cfilm=",".html");
>
>                         $title = removeTag($moviename);
>                         $moviename = removeTag($moviename);
>                         my ($movieyear)= $moviename =~/\((\d+)\)/;
>                         if ($movieyear) {
>                                 $title = $title." (".$movieyear.")";
>                         }
>                         $moviename=$title ;
>
>                         # advance data to next movie
>                         $data = substr($data, - (length($data) -
> $finish));
>                         $start = index($data, $beg);
>                         $finish = index($data, $end, $start);
>
>                         # add to array
>                         push(@movies, "$movienum:$moviename");
>                 }
>
>                 # display array of values
>                 for $movie (@movies) {
>                         print "$movie\n";
>                 }
>         }
> }
>
> #
> # Main Program
> #
>
> # parse command line arguments
>
>     GetOptions( "utf8" => \$opt_u_dummy,
>                 "version" => \$opt_v,
>                 "info" => \$opt_i,
>                 "language" => \$opt_l,
>                 "originaltitle" => \$opt_originaltitle,
>                 "casting" => \$opt_casting,
>                 "Data" => \$opt_D,
>                 "Movie" => \$opt_M,
>                 "Poster" => \$opt_P
>                 );
>

> # print out info
> if (defined $opt_v) { version(); exit 1; }
> if (defined $opt_i) { info(); exit 1; }
> if (defined $opt_l) {
>     my $lang = shift;
> }
>
> # print out usage if needed
> if (defined $opt_h || $#ARGV<0) { help(); }
>
> if (defined $opt_D) {
>    # take movieid from cmdline arg
>    $movieid = shift || die "Usage : $0 -D <movieid>\n";
>    getMovieData($movieid);
> }
>
> elsif (defined $opt_P) {
>    # take movieid from cmdline arg
>    $movieid = shift || die "Usage : $0 -P <movieid>\n";
>    getMoviePoster($movieid);
> }
>
> elsif (defined $opt_M) {
>    # take query from cmdline arg
>    #$options = shift || die "Usage : $0 -M <query>\n";
>    my $query;
>    my $options = '';
>    foreach $key (0 .. $#ARGV) {
>         $query .= $ARGV[$key]. ' ';
>    }
>    getMovieList($query, $options);
> }
> # vim: set expandtab ts=3 sw=3 :

New description:

 Allocine has release an API for the metadata (thanks to
 http://wiki.gromez.fr/dev/api/allocine):
 http://api.allocine.fr/xml/movie?code=$movieid&partner=3

 I have made some modification based on the allocine.pl script from Xavier
 Hervy to be consistant with the allocine API and the mythtv 0.24 metadata
 schema

 I hope this helps

--

-- 
Ticket URL: <http://svn.mythtv.org/trac/ticket/9074#comment:2>
MythTV <http://www.mythtv.org/>
MythTV Media Center


More information about the mythtv-commits mailing list