<?php
        /*
        
         Bell ExpressVu PPV Scraper for MythTV v0.19 - 0.19.1 svn
         by Andrew Saunders (saunders@pagpaintball.com)
        
         ChangeLog:
                 April 29, 2006 - v0.2 - minor fixes plus updates for the minor changes to the guide.
                 April 24, 2006 - v0.1 - fully working version
        
         Installation:
                 step 1 - this requires the CLI version of PHP5 (may work with 4), must include mysqli and cURL support.
                 step 2 - select which channels you want from labs.zap2it.com or another source and run "mythfilldatabase".
                 step 3 - set config info (below) and select which PPV channels you wish to update from below.
                 step 4 - run from either a web browser or commandline (use browser if debugging).
                 step 5 - enjoy having ppv info in your guide.
                 
         ToDo:
                 - experiment with curl_multi to grab multiple pages at once.
                 
         === NOTE : THIS IS A BETA RELEASE, IT WORKS FOR ME, YMMV! ===
        
        */
        
        /* ------------------------------------------------------------------------
                CONFIG
        ------------------------------------------------------------------------ */
        
        $debugLevel = 4;                        // 0=no output, 1=minimal, 2=verbose, (3&4 debugging only!) 3=include runtime variables, 4=all runtime info
        $daysToScrape = 2;                        // how many days to scrape (read note above first)
        $timezone = 'AST';                        // options are : PST MST CST EST AST NEWF
        $mysqlName = 'root';                // mysql name
        $mysqlPass = '';                        // mysql password
        $mysqlDB = 'mythconverg';        // mythtv db name
        $mysqlAddr = 'localhost';        // db address
        $sourceID = 3;                                // sourceID to update (find this in mythweb > edit settings > channel info)
        $channelsToScrape = array();
        // uncomment for which channels you want scraped (for speed's sake please only select what you watch and if it's in season)
        // french PPV
        //array_push($channelsToScrape, 156, 157, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177);
        // english PPV
        array_push($channelsToScrape, 351, 352, 353, 354, 355, 356, 357, 358, 359, 360, 361, 362, 363, 364, 365, 366, 367, 368, 369, 370, 371, 372, 373, 374, 375, 376, 377, 378, 379, 380, 381);
        // soccer
        //array_push($channelsToScrape, 403);
        // nhl
        //array_push($channelsToScrape, 425, 426, 427, 428, 429, 430, 431, 432, 433, 434, 435, 436, 437, 438);
        // nascar
        //array_push($channelsToScrape, 440, 441, 442, 443, 444, 445, 446);
        // nfl
        //array_push($channelsToScrape, 451, 452, 453, 454, 455, 456, 457, 458, 459, 460, 461, 462, 463, 464, 465, 466, 467, 468);
        // hpi tv
        //array_push($channelsToScrape, 475, 476, 477);
        // cricket
        //array_push($channelsToScrape, 703);
        // poland
        //array_push($channelsToScrape, 711);
        // kids PPV
        array_push($channelsToScrape, 560, 561);
        // venus PPV
        //array_push($channelsToScrape, 749, 750, 751, 752, 753, 754, 755, 756, 757, 758, 759, 760);
        // HD PPV
        //array_push($channelsToScrape, 830, 831, 832, 833);
        /* ------------------------------------------------------------------------
                DO NOT EDIT BELOW THIS LINE UNLESS YOU KNOW WHAT YOUR DOING
        ------------------------------------------------------------------------ */
        set_time_limit(60 * 60);        // 1hr max runtime for script to finish
        // setup mysql
        $dbi = new mysqli($mysqlAddr, $mysqlName, $mysqlPass, $mysqlDB) or die('Could not connect: ' . mysql_error());
        $stmt = $dbi->stmt_init();
        // setup cURL
        $ch = curl_init();
        curl_setopt($ch, CURL_HTTP_VERSION_1_1, true);
        curl_setopt($ch, CURLOPT_TIMEOUT, 30);
        curl_setopt($ch, CURLOPT_FAILONERROR, true);
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, true);
        curl_setopt($ch, CURLOPT_FOLLOWLOCATION, true);
        curl_setopt($ch, CURLOPT_ENCODING, 'gzip,deflate');
        curl_setopt($ch, CURLOPT_USERAGENT, 'User-Agent: Mozilla/5.0 (Windows; U; Windows NT 5.1; en-US; rv:1.8.0.2) Gecko/20060308 Firefox/1.5.0.2');
        // uncomment below to generate a new cookie every time
        curl_setopt($ch, CURLOPT_COOKIEJAR, '-');
        // or uncomment below to save cookies between instances
        //$cookieFile = '/dir/to/xvu_cookie.txt';
        //curl_setopt($ch, CURLOPT_COOKIEJAR, $cookieFile);
        //curl_setopt($ch, CURLOPT_COOKIEFILE, $cookieFile);
        // uncomment below for extra debugging info
        //curl_setopt($ch, CURLOPT_VERBOSE, true);
        //curl_setopt($ch, CURLOPT_HEADER, true);
        $curlMaxRetries = 10;                        // maximum number of times it will retry to get an existing page
        $curlMaxAbort = 20;                                // maximum number of retries before failing out
        $commandLine = isset($argv);        // true/false if running from the commandline
        /* ------------------------------------------------------------------------
                RETRIEVE ALL THE PPV IDS
        ------------------------------------------------------------------------ */
        $currentDay = 0;
        $currentHour = 1;
        $ppvIDArray = array();
        // get initial cookie
        $html = false;
        $retries = 0;
        while (($html === false) && (++$retries <= $curlMaxRetries)) {
                checkRetries();
                // site returned nothing. hit it again
                printDebugInfo('retreiving initial page for cookie :: attempt ' . $retries . ' / ' . $curlMaxRetries, 1);
                curl_setopt($ch, CURLOPT_URL, 'http://www.bell.ca/ExpressVuEPG/loadVuGuide.do?lang=en');
                $html = curl_exec($ch);
                printDebugInfo($html, 4);
                if (preg_match("/Sorry, due to technical difficulties this function is not available/", $html)) {
                        //Sorry, due to technical difficulties this function is not available at this time. Please try again later.
                        printDebugInfo('guide is down, try again later...', 1);
                        //exit();
                        $html = false;
                }
        }
        if ($html === false) {
                printDebugInfo('unable to retrieve initial page, aborting...', 1);
                exit();
        }
                
                
        // caculate the starting/ending channel
        $startingChannel = 1000;
        $endChannel = 0;
        foreach($channelsToScrape as $junk => $channel) {
                if ($channel < $startingChannel) {
                        $startingChannel = $channel;
                }
                if ($channel > $endChannel) {
                        $endChannel = $channel;
                }
        }
        while ($currentDay < $daysToScrape) {
                $pageDown = false;
                do {
                        $html = false;
                        $retries = 0;
                        while (($html === false) && (++$retries <= $curlMaxRetries)) {
                                if (!$pageDown) {
                                        printDebugInfo('retrieving list of PPVs :: ' .
                                                'day ' . ($currentDay + 1) . ' / ' . $daysToScrape .
                                                ', hour ' . date('H:i', mktime((($currentHour+1)/2),((($currentHour-1)%2)*30),0,1,1,2000)) .
                                                ' - ' . date('H:i', mktime((($currentHour+1+4)/2),((($currentHour-1)%2)*30),0,1,1,2000)) .
                                                ', attempt ' . $retries . ' / ' . $curlMaxRetries, 1);
                                } else {
                                        printDebugInfo('retrieving next page of channels :: attempt '. $retries . ' / ' . $curlMaxRetries, 2);
                                }
                                checkRetries();
                                if (!$pageDown) {
                                        // send POST, follow redirect (automatically) and get page
                                        // selectedStartTime :: 1=1am, 2=1:30am, 3=2:00am, ..., 46=11:30pm, 47=12:00am, 48=12:30am
                                        // selectedDay :: 0=current day, 1=next day, 2=two days later, ..., 13=...
                                        // gotoChannel :: starting channel
                                        curl_setopt($ch, CURLOPT_URL, 'http://www.bell.ca/ExpressVuEPG/submitSearchFilter.do');
                                        curl_setopt($ch, CURLOPT_REFERER, 'http://www.bell.ca/ExpressVuEPG/loadVuGuide.do?lang=en');
                                        curl_setopt($ch, CURLOPT_POST, true);
                                        curl_setopt($ch, CURLOPT_POSTFIELDS, 'favID=&favName1=&favName2=&favName3=&favName4=&favKeyword1=&favKeyword2=&favKeyword3=&favKeyword4=&favNetwork1=&favNetwork2=&favNetwork3=&favNetwork4=&favTheme1=&favTheme2=&favTheme3=&favTheme4=&selectedTheme=1%2C0%2C0%2C0%2C0%2C0%2C0%2C0&progID=&progTZ=&selectedKeyword=&selectedNetwork=&cbAll=on&orderbyName=&orderbyNo=asc&searchWin=1&userTimeZone=0&selectedFavName=&selectedDay=' . $currentDay . '&selectedStartTime=' . $currentHour . '&selectedTimeZone=' . $timezone . '&gotoChannel=' . $startingChannel . '&buttonPressed.x=16&buttonPressed.y=8');
                                        $html = curl_exec($ch);
                                        printDebugInfo($html, 4);
                                } else {
                                        // send command to move down a page
                                        curl_setopt($ch, CURLOPT_URL, 'http://www.bell.ca/ExpressVuEPG/submitChangeView.do?buttonPressed=DOWN');
                                        curl_setopt($ch, CURLOPT_REFERER, 'http://www.bell.ca/ExpressVuEPG/submitSearchFilter.do');
                                        curl_setopt($ch, CURLOPT_POST, false);
                                        $html = curl_exec($ch);
                                        printDebugInfo($html, 4);
                                }
                                if (preg_match("/Sorry, due to technical difficulties this function is not available/", $html)) {
                                        printDebugInfo('guide is down, trying again', 2);
                                        $html = false;
                                }
                        }
                        $html = preg_replace("/([\r\n]| )/m", "", $html);        // remove newlines and junk
                        $html = preg_replace("/^.*?start of dynamic rows(.*?)end of dynamic rows.*$/", "$1", $html);        // trim
                        $html = preg_replace("/[ \t]*([<>])[ \t]*/", "$1", $html);        // tighten
                        $htmlArray = preg_split("/<\/tr>/", $html);        // now split up this way via </tr>
                        foreach($htmlArray as $htmlPart) {
                                if (preg_match("/<td bgcolor=\"#E5F2F8\" width=\"4%\" align=\"center\">\d+<\/td>/", $htmlPart)) {
                                        $channel = preg_replace("/^.*?<td bgcolor=\"#E5F2F8\" width=\"4%\" align=\"center\">(\d+)<\/td>.*$/", "$1", $htmlPart);
                                        if ($channel > $endChannel) {
                                                // no need to go any further
                                                //continue 2;
                                        }
                                        printDebugInfo('channel : ' . $channel, 3);
                                        if (in_array($channel, $channelsToScrape)) {
                                                // if this is a channel we want then rip out ppv links
                                                unset($matches);
                                                preg_match_all("/javascript:popupwin(?:PPV)?\(\'(\d+)\',/m", $htmlPart, $matches);
                                                printDebugInfo('ppv ids from page ($matches)', 3);
                                                printDebugInfo($matches, 3);
                                                foreach($matches[1] as $match) {
                                                        // this will automatically force uniqueness and set the movie to the first day found
                                                        if (!isset($ppvIDArray[$match])) {
                                                                // annoyingly I have to set the day manually as it isn't _anywhere_ on the info page...
                                                                if ($currentHour >= 47) {
                                                                        // if it's past midnight it's the next day
                                                                        $ppvIDArray[$match] = array('day' => ($currentDay + 1));
                                                                } else {
                                                                        $ppvIDArray[$match] = array('day' => $currentDay);
                                                                }
                                                        }
                                                }
                                        }
                                }
                        }
                        $pageDown = true;        // start heading down through the pages
                } while (preg_match("/javascript:popupwin(?:PPV)?\(\'(\d+)\',/m", $html));        // while PPVs still listed
                // adjust time
                $currentHour += 4;        // add 2 hours (2 hours listed per page)
                if ($currentHour > 48) {
                        // past the maximum 48 so roll to the next day
                        $currentHour = $currentHour % 48;
                        $currentDay++;
                }
        }
        printDebugInfo('all ppv ids found ($ppvIDArray)', 3);
        printDebugInfo($ppvIDArray, 3);
        printDebugInfo('finished retrieving PPV list, ' . count($ppvIDArray) . ' PPVs found', 1);
        
        /* ------------------------------------------------------------------------
                RETRIEVE ALL INDIVIDUAL PPV INFORMATION
        ------------------------------------------------------------------------ */
        
        foreach($ppvIDArray as $ppvID => $ppvInfo) {
                printDebugInfo('retrieving PPV ' . $ppvID, 2);
                curl_setopt($ch, CURLOPT_POST, false);
                curl_setopt($ch, CURLOPT_REFERER, 'http://www.bell.ca/ExpressVuEPG/submitSearchFilter.do');
                curl_setopt($ch, CURLOPT_URL, 'http://www.bell.ca/ExpressVuEPG/vuDetails.do?code=' . $ppvID . '&tzcode=' . $timezone);
                $html = false;
                $retries = 0;
                while (($html === false) && (++$retries <= $curlMaxRetries)) {
                        checkRetries();
                        $html = curl_exec($ch);
                        printDebugInfo($html, 4);
                }
                $html = preg_replace("/([\r\n]| )/m", "", $html);        // remove newlines and junk
                $html = preg_replace("/^.*?<Body(.*?)Your Bell ExpressVu PIN.*$/", "$1", $html);        // trim
                $html = preg_replace("/[ \t]*([<>])[ \t]*/", "$1", $html);        // tighten
                // rip out info
                $title = preg_replace("/^.*?<td valign=\"top\" align=\"left\"><div class=\"bigblueBoldText\">(.*?)<\/div>.*$/m", "$1", $html);
                $channel = preg_replace("/^.*?>Channel: .*? - (\d+).*$/m", "$1", $html);
                $startTime = preg_replace("/^.*?>Start Time: (\d?\d:\d\d .M) .*$/m", "$1", $html);
                $endTime = preg_replace("/^.*?>End Time: (\d?\d:\d\d .M) .*$/m", "$1", $html);
                $description = $rating = $cost = '';
                if (preg_match("/>Description of the show:</", $html)) {
                        $description = preg_replace("/^.*?>Description of the show:<\/div><div class=\"blueText\">(.*?)<\/div>.*$/m", "$1", $html);
                        $description = preg_replace("/^\(\d{2}:\d{2}[ap]m[^\)]*?\)(.*)$/m", "$1", $description);        // remove useless date from description
                }
                if (preg_match("/>Rating:</", $html)) {
                        $rating = preg_replace("/^.*?>Rating:<\/div><div class=\"blueText\">([^<]*?)<\/div>.*$/m", "$1", $html);
                        $rating = preg_replace("/([ ]+,)/", "", $rating);        // clean up junk
                }
                if (preg_match("/>[\$](\d+\.\d\d)</", $html)) {
                        $cost = preg_replace("/^.*?<div class=\"blueText\">[\$](\d+\.\d\d)<\/div>.*$/m", "$1", $html);
                }
                if (in_array($channel, $channelsToScrape)) {
                        // if we want this channel then insert into array
                        $ppvIDArray[$ppvID] = array (
                                'day' => $ppvInfo['day'],
                                'title' => html_entity_decode($title),
                                'channel' => $channel,
                                'starttime' => $startTime,
                                'endtime' => $endTime,
                                'description' => html_entity_decode($description),
                                'rating' => html_entity_decode($rating),
                                'cost' => $cost        );
                } else {
                        unset($ppvIDArray[$ppvID]);
                }
                printDebugInfo("$title :: $channel :: $startTime :: $endTime :: $description :: $rating :: $cost", 3);
        }
        printDebugInfo('all info to be inserted into myth ($ppvIDArray)', 3);
        printDebugInfo($ppvIDArray, 3);
        
        /* ------------------------------------------------------------------------
                SAVE PPV INFO TO MYTHTV DATABASE
        ------------------------------------------------------------------------ */
        
        printDebugInfo('inserting ppv info into myth', 1);
        
        // get chanid for each individual channel
        printDebugInfo('retrieving channel info from myth db', 3);
        $channelsInMyth = array();
        foreach($channelsToScrape as $junk => $channel) {
                $sql = 'SELECT chanid, channum FROM channel WHERE sourceid = ? AND channum = ?';
                if ($stmt->prepare($sql)) {
         $stmt->bind_param('ii', $sourceID, $channel);
         $stmt->execute();
         $stmt->store_result(); // buffer everything
         }
         if ($stmt->errno) {
                        die($stmt->error);
                } else {
                        $stmt->bind_result($chanid, $channum);
                        $stmt->fetch();
                        if ($channum) {
                                $channelsInMyth[$channum] = $chanid;
                        }
                }
                $stmt->free_result();
        }
        
        // delete all channel lineups
        printDebugInfo('deleting all previous info from ppv channels', 3);
        $sqls = array(
                'DELETE FROM program WHERE chanid = ?',
                'DELETE FROM programgenres WHERE chanid = ?',
                'DELETE FROM programrating WHERE chanid = ?');
        foreach($channelsInMyth as $channum => $chanid) {
                foreach ($sqls as $sql) {
                        if ($stmt->prepare($sql)) {
                                $stmt->bind_param('i', $chanid);
                         $stmt->execute();
                        }
                        if ($stmt->errno) {
                                die($stmt->error);
                        }
                        $stmt->free_result();
                }
        }
        
        printDebugInfo('inserting individual ppv info', 3);
        // insert into myth
        foreach($ppvIDArray as $ppvID => $ppvInfo) {
                if (isset($channelsInMyth[$ppvInfo['channel']])) {
                        $chanid = $channelsInMyth[$ppvInfo['channel']];
                        $currentDate = date('Y-m-d', mktime(0, 0, 0, date("m"), date("d")+$ppvInfo['day'], date("Y")));
                        $starttime = date('Y-m-d G:i:s', (strtotime($currentDate . ' ' . $ppvInfo['starttime'])));
                        if ((strtotime($currentDate . ' ' . $ppvInfo['starttime'])) > (strtotime($currentDate . ' ' . $ppvInfo['endtime']))) {
                                // the endtime lands on the next day
                                $currentDate = date('Y-m-d', mktime(0, 0, 0, date("m"), date("d")+$ppvInfo['day']+1, date("Y")));
                        }
                        $endtime = date('Y-m-d G:i:s', (strtotime($currentDate . ' ' . $ppvInfo['endtime'])));
                        $genre = 'PPV';                // acceptable?
                        $closecaptioned = 0;
                        $stars = 0;
                        $stereo = 1;
                        $title = $ppvInfo['title'];
                        $description = $ppvInfo['description'];
                        if ($ppvInfo['rating']) {
                                $description .= "\r\n" . 'Rating: ' . $ppvInfo['rating'];
                        }
                        if ($ppvInfo['cost']) {
                                $description .= "\r\n" . 'Cost: $' . $ppvInfo['cost'];
                        }
                        $channum = $ppvInfo['channel'];
                        // try and detect some ratings
                        if (preg_match("/\(G\)/", $ppvInfo['rating'])) {
                                $rating = 'G';
                        } elseif (preg_match("/\(PG\)/", $ppvInfo['rating'])) {
                                $rating = 'PG';
                        } elseif (preg_match("/\(R\)/", $ppvInfo['rating'])) {
                                $rating = 'R';
                        } else {
                                $rating = 'NR';
                        }
                        // insert into sql
                        $sql = 'INSERT INTO program (starttime, endtime, chanid, category, closecaptioned, stars, stereo, title, description) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)';
                        if ($stmt->prepare($sql)) {
                                $stmt->bind_param('ssisidiss', $starttime, $endtime, $chanid, $genre, $closecaptioned, $stars, $stereo, $title, $description);
                         $stmt->execute();
                        }
                        if ($stmt->errno) {
                                //die($stmt->error);
                                printDebugInfo('mysql : ' . $stmt->error, 1);
                        }
                        $stmt->free_result();
                        $sql = 'INSERT INTO programgenres (starttime, chanid, genre) VALUES (?, ?, ?)';
                        if ($stmt->prepare($sql)) {
                                $stmt->bind_param('sis', $starttime, $chanid, $genre);
                         $stmt->execute();
                        }
                        if ($stmt->errno) {
                                //die($stmt->error);
                                printDebugInfo('mysql : ' . $stmt->error, 1);
                        }
                        $stmt->free_result();
                        $sql = 'INSERT INTO programrating (starttime, chanid, rating) VALUES (?, ?, ?)';
                        if ($stmt->prepare($sql)) {
                                $stmt->bind_param('sis', $starttime, $chanid, $rating);
                         $stmt->execute();
                        }
                        if ($stmt->errno) {
                                //die($stmt->error);
                                printDebugInfo('mysql : ' . $stmt->error, 1);
                        }
                        $stmt->free_result();
                }
        }
        
        printDebugInfo('done...', 1);
        curl_close($ch);
        exit();
        // done
        
        /* ------------------------------------------------------------------------
                FUNCTIONS
        ------------------------------------------------------------------------ */
        /**
         * output to either web browser or console
         *
         * @param string/array $text
         */
        function printDebugInfo($text, $textDebugLevel) {
                global $debugLevel, $commandLine;
                
                if ($textDebugLevel <= $debugLevel) {
                        if ($commandLine) {
                                // running from command line
                                if (is_array($text)) {
                                        print_r($text) . "\n";
                                } else {
                                        echo $text . "\n";
                                }
                        } else {
                                // running from a browser
                                if ($textDebugLevel == 3) {
                                        echo '<div style="margin: 2px 0; padding-left:10px; background-color:#EEEEEE; border:1px solid #0000FF;">';
                                } elseif ($textDebugLevel == 4) {
                                        echo '<div style="margin: 2px 0; padding-left:10px; background-color:#EEEEEE; border:1px solid #FF0000;">';
                                }
                                if (is_array($text)) {
                                        echo '<pre>';
                                        print_r($text);
                                        echo '</pre><br />';
                                } else {
                                        echo nl2br(htmlentities($text)) . '<br />';
                                }
                                if ($textDebugLevel >= 3) {
                                        echo '</div>';
                                }
                                flush();        // send to browser immediately
                        }
                }
        }
        
        /**
         * keep track of how many "retrys", a function just to keep code clean
         *
         */
        
        function checkRetries() {
                global $retries, $curlMaxAbort;
                
                if ($retries > 1) {
                        if ($curlMaxAbort-- == 0) {
                                printDebugInfo('too many pages failed, aborting...', 1);
                                exit();
                        }
                        // a page failed so wait 3 seconds
                        printDebugInfo('page retrieval failed, waiting 3 seconds before retrying', 3);
                        //sleep(3);
                }
        }
        
?>