[mythtv] [PATCH] More accurate/ranked IMDB searches
Tim Harvey
tharvey at alumni.calpoly.edu
Thu Oct 16 00:54:34 EDT 2003
This is my first patch, so please let me know if I'm not doing something
correct or if you have comments in general.
Two patches:
Libmyth/httpComms: extends capabilities to handle HTTP redirects. Also
adds a static function to synchronously get the HTML data for a URL
handling timeouts/retries/redirects based on function parameters.
Mythvideo/videomanager: cleans up html grabber code by using the new
synchronous function in libmyth. Handles redirects. Sorts the movie
listings according to level of matching (ranking) against your filename.
Handles timeouts/retries in some cases where it didn't before. Allows
doing an 'exact match' if file selected via 'spacebar' (selecting with
'enter' will use the previous 'fuzzy' search if you like it better).
Filters out tv series (is this a problem for anyone?), and video games
from search results.
I hope this can be of some value, it works really well for me as I have
quite a few movies in my collection and was finding it quite tedious to
browse through the selection list that each idmb search produced. With
this patch about 80% of my movies now will skip the movie list selection
completely as the exact search resolves to only 1 result.
Tim
-------------- next part --------------
Index: mythtv/libs/libmyth/httpcomms.cpp
===================================================================
RCS file: /var/lib/mythcvs/mythtv/libs/libmyth/httpcomms.cpp,v
retrieving revision 1.3
diff -u -r1.3 httpcomms.cpp
--- mythtv/libs/libmyth/httpcomms.cpp 24 Sep 2003 23:40:00 -0000 1.3
+++ mythtv/libs/libmyth/httpcomms.cpp 16 Oct 2003 06:30:43 -0000
@@ -1,4 +1,6 @@
#include <iostream>
+#include <qapplication.h>
+#include <unistd.h>
using namespace std;
#include "httpcomms.h"
@@ -9,6 +11,16 @@
init(url);
}
+HttpComms::HttpComms(QUrl &url, int timeoutms)
+ : http(0)
+{
+ init(url);
+ m_timer = new QTimer();
+ m_timer->start(timeoutms, TRUE);
+ connect(m_timer, SIGNAL(timeout()), SLOT(timeout()));
+}
+
+
#ifndef ANCIENT_QT
HttpComms::HttpComms(QUrl &url, QHttpRequestHeader &header)
{
@@ -18,6 +30,10 @@
HttpComms::~HttpComms()
{
+ if (m_timer)
+ {
+ delete m_timer;
+ }
delete http;
}
@@ -48,11 +64,19 @@
http->setHost(url.host(), port);
+ m_debug = 0;
+ m_redirectedURL = "";
m_done = false;
m_data = "";
+ m_statusCode = 0;
+ m_responseReason = "";
+ m_timer = NULL;
+ m_timeout = false;
connect(http, SIGNAL(done(bool)), this, SLOT(done(bool)));
connect(http, SIGNAL(stateChanged(int)), this, SLOT(stateChanged(int)));
+ connect(http, SIGNAL(responseHeaderReceived(const QHttpResponseHeader&)),
+ this, SLOT(headerReceived(const QHttpResponseHeader&)));
http->request(header);
}
@@ -64,6 +88,10 @@
disconnect(http, 0, 0, 0);
http->abort();
#endif
+ if (m_timer)
+ {
+ m_timer->stop();
+ }
}
void HttpComms::done(bool error)
@@ -74,15 +102,133 @@
cout << "MythVideo: NetworkOperation Error on Finish: "
<< http->errorString() << ".\n";
}
- else
+ else if (http->bytesAvailable())
m_data = QString(http->readAll());
+ if (m_debug > 1) {
+ cout << "done: " << m_data.length() << " bytes" << endl;
+ }
m_done = true;
+ if (m_timer)
+ {
+ m_timer->stop();
+ }
#endif
}
void HttpComms::stateChanged(int state)
{
- (void)state;
+ if (m_debug > 1) {
+ (void)state;
+ switch (state) {
+ case QHttp::Unconnected: cout << "unconnected" << endl; break;
+ case QHttp::HostLookup: break;
+ case QHttp::Connecting: cout << "connecting" << endl; break;
+ case QHttp::Sending: cout << "sending" << endl; break;
+ case QHttp::Reading: cout << "reading" << endl; break;
+ case QHttp::Connected: cout << "connected" << endl; break;
+ case QHttp::Closing: cout << "closing" << endl;
+ break;
+ }
+ }
+}
+
+#ifndef ANCIENT_QT
+void HttpComms::headerReceived(const QHttpResponseHeader &resp)
+{
+ m_statusCode = resp.statusCode();
+ m_responseReason = resp.reasonPhrase();
+ if (m_debug > 1) {
+ cout << "Got HTTP response: " << m_statusCode << ":"
+ << m_responseReason << endl;
+ cout << "Keys: " << resp.keys().join(",") << endl;
+ }
+ if (resp.statusCode() >= 300 && resp.statusCode() <= 400) {
+ // redirection
+ QString uri = resp.value("LOCATION");
+ if (m_debug > 0) {
+ cout << "Redirection to: " << uri << endl;
+ }
+ m_redirectedURL = resp.value("LOCATION");
+ }
+}
+#endif
+
+void HttpComms::timeout()
+{
+ m_timeout = true;
+ m_done = true;
+}
+
+
+// getHttp - static function for grabbing http data for a url
+// this is a synchronous function, it will block according to the vars
+QString HttpComms::getHttp(QString& url, int timeoutMS, int maxRetries, int maxRedirects)
+{
+ int redirectCount = 0;
+ int timeoutCount = 0;
+ QString res = "";
+ HttpComms *httpGrabber = NULL;
+ int m_debug = 0;
+
+ while (1) {
+ QUrl qurl(url);
+ if (httpGrabber != NULL)
+ delete httpGrabber;
+ httpGrabber = new HttpComms(qurl, timeoutMS);
+
+ while (!httpGrabber->isDone())
+ {
+ qApp->processEvents();
+ usleep(10000);
+ }
+
+ // Handle timeout
+ if (httpGrabber->isTimedout())
+ {
+ if (m_debug > 0) {
+ cout << "timeout for url:" << url.latin1() << endl;
+ }
+ //Increment the counter and check were not over the limit
+ if (timeoutCount++ >= maxRetries)
+ {
+ cerr << "Failed to contact server for url: " << url.latin1()
+ << endl;
+ break;
+ }
+ // Try again
+ if (m_debug > 0) {
+ cout << "attempt # " << (timeoutCount+1) << "/" << maxRetries
+ << " for url:" << url.latin1() << endl;
+ }
+ continue;
+ }
+
+ // Check for redirection
+ if (!httpGrabber->getRedirectedURL().isEmpty()) {
+ if (redirectCount++ < maxRedirects)
+ {
+ url = httpGrabber->getRedirectedURL();
+ if (m_debug > 0) {
+ cout << "redirect " << redirectCount << "/" << maxRedirects
+ << " to url:" << url.latin1() << endl;
+ }
+ }
+ // Try again
+ timeoutCount = 0;
+ continue;
+ }
+
+ res = httpGrabber->getData();
+ break;
+ }
+
+ delete httpGrabber;
+ if (m_debug > 1) {
+ cout << "Got " << res.length() << " bytes from url: '"
+ << url.latin1() << "'" << endl;
+ }
+ return res;
}
Index: mythtv/libs/libmyth/httpcomms.h
===================================================================
RCS file: /var/lib/mythcvs/mythtv/libs/libmyth/httpcomms.h,v
retrieving revision 1.2
diff -u -r1.2 httpcomms.h
--- mythtv/libs/libmyth/httpcomms.h 2 Sep 2003 16:30:58 -0000 1.2
+++ mythtv/libs/libmyth/httpcomms.h 16 Oct 2003 06:30:43 -0000
@@ -5,6 +5,7 @@
#include <qfile.h>
#include <qurl.h>
#include <qobject.h>
+#include <qtimer.h>
#if (QT_VERSION < 0x030100)
#define ANCIENT_QT
@@ -16,6 +17,7 @@
Q_OBJECT
public:
HttpComms(QUrl &url);
+ HttpComms(QUrl &url, int timeoutms);
#ifndef ANCIENT_QT
HttpComms(QUrl &url, QHttpRequestHeader &header);
#endif
@@ -26,24 +28,42 @@
#else
bool isDone(void) { return true; }
#endif
+
+ int getStatusCode(void) { return m_statusCode; }
+ QString getResponseReason(void) { return m_responseReason; }
QString getData(void) { return m_data; }
+ QString getRedirectedURL(void) { return m_redirectedURL; }
void stop();
+ bool isTimedout(void) { return m_timeout; }
+
+ static QString getHttp(QString& url, int timeoutMS = 10000,
+ int maxRetries = 3, int maxRedirects = 3);
protected:
void init(QUrl &url);
#ifndef ANCIENT_QT
void init(QUrl &url, QHttpRequestHeader &header);
#endif
-
+
private slots:
+ void timeout();
void done(bool error);
void stateChanged ( int state );
-
+#ifndef ANCIENT_QT
+ void headerReceived(const QHttpResponseHeader &resp);
+#endif
+
private:
+ int m_statusCode;
+ QString m_redirectedURL;
+ QString m_responseReason;
QHttp *http;
bool m_done;
QString m_data;
+ QTimer* m_timer;
+ bool m_timeout;
+ int m_debug;
};
#endif
-------------- next part --------------
Index: mythvideo/mythvideo/videomanager.cpp
===================================================================
RCS file: /var/lib/mythcvs/mythvideo/mythvideo/videomanager.cpp,v
retrieving revision 1.20
diff -u -r1.20 videomanager.cpp
--- mythvideo/mythvideo/videomanager.cpp 24 Sep 2003 03:09:58 -0000 1.20
+++ mythvideo/mythvideo/videomanager.cpp 16 Oct 2003 06:37:47 -0000
@@ -20,12 +20,21 @@
#include <mythtv/mythcontext.h>
#include <mythtv/mythdialogs.h>
+#define DEBUG_LEVEL 0
+#define ALLOW_EXACT_SEARCH
+#define IMDB_SEARCH_OPTIONS "&from_year=1890&to_year=2010&sort=smart&tv=off&x=12&y=14"
+
VideoManager::VideoManager(QSqlDatabase *ldb,
MythMainWindow *parent, const char *name)
: MythDialog(parent, name)
{
db = ldb;
updateML = false;
+ use_fuzzy_search = true;
+ ignore_tv_series = true;
+ ignore_tv_videos = true;
+ debug = DEBUG_LEVEL;
+ isbusy = false; // ignores keys when true (set when doing http request)
RefreshMovieList();
@@ -49,14 +58,7 @@
listCountMovie = 0;
dataCountMovie = 0;
- GetMovieListingTimeoutCounter = 0;
- stopProcessing = false;
-
m_state = SHOWING_MAINWINDOW;
- httpGrabber = NULL;
-
- urlTimer = new QTimer(this);
- connect(urlTimer, SIGNAL(timeout()), SLOT(GetMovieListingTimeOut()));
theme = new XMLParse();
theme->SetWMult(wmult);
@@ -100,13 +102,6 @@
VideoManager::~VideoManager(void)
{
- if (httpGrabber)
- {
- httpGrabber->stop();
- delete httpGrabber;
- }
- delete urlTimer;
-
delete theme;
delete bgTransBackup;
@@ -116,13 +111,23 @@
void VideoManager::keyPressEvent(QKeyEvent *e)
{
+ if (isbusy)
+ return;
+
if (allowselect)
{
switch (e->key())
{
case Key_Space:
+#ifdef ALLOW_EXACT_SEARCH
+ use_fuzzy_search = false;
+ selected();
+ return;
+ break;
+#endif
case Key_Enter:
case Key_Return:
+ use_fuzzy_search = true;
selected();
return;
break;
@@ -261,10 +266,9 @@
QString VideoManager::parseData(QString data, QString beg, QString end)
{
- bool debug = false;
QString ret;
- if (debug == true)
+ if (debug > 2)
{
cout << "MythVideo: Parse HTML : Looking for: " << beg << ", ending with: " << end << endl;
}
@@ -276,13 +280,13 @@
replaceNumCharRefs(ret);
- if (debug == true)
+ if (debug > 2)
cout << "MythVideo: Parse HTML : Returning : " << ret << endl;
return ret;
}
else
{
- if (debug == true)
+ if (debug > 2)
cout << "MythVideo: Parse HTML : Parse Failed...returning <NULL>\n";
ret = "<NULL>";
return ret;
@@ -291,10 +295,9 @@
QString VideoManager::parseDataAnchorEnd(QString data, QString beg, QString end)
{
- bool debug = false;
QString ret;
- if (debug == true)
+ if (debug > 2)
{
cout << "MythVideo: Parse (Anchor End) HTML : Looking for: " << beg << ", ending with: " << end << endl;
}
@@ -307,13 +310,13 @@
replaceNumCharRefs(ret);
- if (debug == true)
+ if (debug > 2)
cout << "MythVideo: Parse HTML : Returning : " << ret << endl;
return ret;
}
else
{
- if (debug == true)
+ if (debug > 2)
cout << "MythVideo: Parse HTML : Parse Failed...returning <NULL>\n";
ret = "<NULL>";
return ret;
@@ -350,7 +353,68 @@
movieTitle = ret.right(ret.length() - ret.find("\">") - 2);
- listing[movieNumber] = movieTitle;
+ // filter
+ bool filterout = false;
+ if (ignore_tv_series && movieTitle.endsWith("(TV)") )
+ filterout = true;
+ if (ignore_tv_videos && movieTitle.endsWith("(V)") )
+ filterout = true;
+ if (movieTitle.endsWith("(VG)") )
+ filterout = true;
+
+ if (!filterout) {
+ QStringList titleWords = QStringList::split("+", theMovieName.upper());
+ QStringList hitWords = QStringList::split(QRegExp("\\s"), movieTitle.upper());
+
+ int hits = 0;
+ for (uint i = 0; i < titleWords.count(); i++) {
+ for (uint j = 0; j < hitWords.count(); j++) {
+ if (hitWords[j] == titleWords[i])
+ hits++;
+ }
+ }
+
+ // hitratio will be 0-100 (NOTE: imdb puts '(year)' on end of title
+ int total = titleWords.count() * hitWords.count();
+ int ratio = (total>0)?100*hits/total:0;
+ // ranking is combination of hitratio and order returned by imdb
+ movieTitle.sprintf("%03d %s", ratio + (100-count), movieTitle.latin1());
+ if (debug > 0) {
+ cout << "Adding '" << movieTitle << "' - " << movieNumber << endl;
+ }
+ if (listing[movieNumber] != "") {
+ // for duplicate, bump the current ranking
+ QString current = listing[movieNumber];
+ bool isNum = FALSE;
+ int rank = movieTitle.left(4).toInt(&isNum);
+ if (isNum) {
+ current.sprintf("%03d %s", rank + 10, current.mid(4).latin1());
+ listing[movieNumber] = current;
+ }
+
+ if (debug > 0) {
+ cout << "duplicate found, adjusting ranking: '" << current
+ << "'" << endl;
+ }
+ }
+ if (listing[movieNumber] != "") {
+ // for duplicate, bump the current ranking
+ QString current = listing[movieNumber];
+ bool isNum = FALSE;
+ int rank = movieTitle.left(4).toInt(&isNum);
+ if (isNum) {
+ current.sprintf("%03d %s", rank + 10, current.mid(4).latin1());
+ listing[movieNumber] = current;
+ }
+
+ if (debug > 0) {
+ cout << "duplicate found, adjusting ranking: '" << current
+ << "'" << endl;
+ }
+ } else
+
+ listing[movieNumber] = movieTitle;
+ }
data = data.right(data.length() - endint);
start = data.find(beg, 0, false) + beg.length();
@@ -407,28 +471,15 @@
QString host = "www.imdb.com";
QString path = "";
- QUrl url("http://" + host + "/title/tt" + movieNum + "/posters");
-
- //cout << "Grabbing Poster HTML From: " << url.toString() << endl;
+ QString url = "http://" + host + "/title/tt" + movieNum + "/posters";
+ isbusy = true;
+ QString res = HttpComms::getHttp(url);
+ isbusy = false;
- if (httpGrabber)
- {
- httpGrabber->stop();
- delete httpGrabber;
- }
-
- httpGrabber = new HttpComms(url);
-
- while (!httpGrabber->isDone())
- {
- qApp->processEvents();
- usleep(10000);
+ if (debug > 1) {
+ cout << "Got " << res.length() << " byte result: " << res.latin1() << endl;
}
- QString res;
- res = httpGrabber->getData();
-
-
QString beg, end, filename = "<NULL>";
// Check for posters on impawards.com first, since their posters
@@ -441,28 +492,9 @@
{
//cout << "Retreiving poster from " << impsite << endl;
-
- QUrl impurl(impsite);
-
- //cout << "Grabbing Poster HTML From: " << url.toString() << endl;
-
- if (httpGrabber)
- {
- httpGrabber->stop();
- delete httpGrabber;
- }
-
- httpGrabber = new HttpComms(impurl);
-
- while (!httpGrabber->isDone())
- {
- qApp->processEvents();
- usleep(10000);
- }
-
- QString impres;
-
- impres = httpGrabber->getData();
+ isbusy = true;
+ QString impres = HttpComms::getHttp(impsite);
+ isbusy = false;
beg = "<img SRC=\"posters/";
end = "\" ALT";
@@ -526,26 +558,11 @@
movieNumber = movieNum;
QString host = "www.imdb.com";
- QUrl url("http://" + host + "/title/tt" + movieNum + "/");
-
- //cout << "Grabbing Data From: " << url.toString() << endl;
-
- if (httpGrabber)
- {
- httpGrabber->stop();
- delete httpGrabber;
- }
-
- httpGrabber = new HttpComms(url);
-
- while (!httpGrabber->isDone())
- {
- qApp->processEvents();
- usleep(10000);
- }
-
- QString res;
- res = httpGrabber->getData();
+ QString url = "http://" + host + "/title/tt" + movieNum + "/";
+ //cout << "Grabbing Data From: " << url.latin1() << endl;
+ isbusy = true;
+ QString res = HttpComms::getHttp(url);
+ isbusy = false;
//cout << "Outputting Movie Data Page\n" << res << endl;
@@ -558,38 +575,27 @@
QString host = "us.imdb.com";
theMovieName = movieName;
- QUrl url("http://" + host + "/Tsearch?title=" + movieName +
- "&type=fuzzy&from_year=1890" +
- "&to_year=2010&sort=smart&tv=off&x=12&y=14");
-
- //cout << "Grabbing Listing From: " << url.toString() << endl;
-
- if (httpGrabber)
- {
- httpGrabber->stop();
- delete httpGrabber;
- }
-
- httpGrabber = new HttpComms(url);
-
- urlTimer->stop();
- urlTimer->start(10000);
-
- stopProcessing = false;
- while (!httpGrabber->isDone())
- {
- qApp->processEvents();
- if (stopProcessing)
- return 1;
- usleep(10000);
+ QString url = "http://" + host + "/Tsearch?title=" + movieName + IMDB_SEARCH_OPTIONS;
+ if (use_fuzzy_search) {
+ url += "&type=fuzzy";
+ }
+ if (debug > 0) {
+ cout << "Grabbing Listing From: " << url.latin1() << endl;
+ }
+ isbusy = true;
+ QString res = HttpComms::getHttp(url);
+ isbusy = false;
+
+ // If URL has been redirected to a movie then it was an only match
+ if (url.find("title/tt") != -1) {
+ int fnd = url.find("title/tt") + 8;
+ movieNumber = url.mid(fnd, url.findRev("/") - fnd);
+ return 1; // this does a re-request but simplest for now
}
- urlTimer->stop();
-
- QString res;
- res = httpGrabber->getData();
-
QString movies = parseData(res, "<A NAME=\"mov\">Movies</A></H2>", "</TABLE>");
+ QString popular = parseData(res, "<A NAME=\"top\">Most popular searches</A></H2>", "</OL>");
+ movies = popular + movies;
movieList.clear();
@@ -615,9 +621,9 @@
}
}
- movieList["manual"] = tr("Manually Enter IMDB #");
- movieList["reset"] = tr("Reset Entry");
- movieList["cancel"] = tr("Cancel");
+ movieList["manual"] = tr("002 Manually Enter IMDB #");
+ movieList["reset"] = tr("001 Reset Entry");
+ movieList["cancel"] = tr("000 Cancel");
ret = 2;
return ret;
@@ -836,8 +842,6 @@
QString title = "";
- QMap<QString, QString>::Iterator it;
-
LayerSet *container = NULL;
container = theme->GetSet("moviesel");
if (container)
@@ -849,19 +853,29 @@
ltype->ResetList();
ltype->SetActive(true);
- for (it = movieList.begin(); it != movieList.end(); ++it)
+ // build list for display sorted by rank
+ QStringList sortedList = movieList.values();
+ sortedList.sort();
+
+ for (int i = sortedList.count(); i > 0; i--)
{
+ QString movieTitle = sortedList[i-1];
+ bool isNum = FALSE;
+ movieTitle.left(4).toInt(&isNum);
+ if (isNum) {
+ movieTitle = movieTitle.mid(4); // remove the ranking
+ }
if (cnt < listsizeMovie)
{
if (pastSkip <= 0)
{
if (cnt == inListMovie)
{
- curitemMovie = (*it).data();
+ curitemMovie = movieTitle;
ltype->SetItemCurrent(cnt);
}
- ltype->SetItemText(cnt, 1, (*it).data());
+ ltype->SetItemText(cnt, 1, movieTitle);
cnt++;
listCountMovie++;
@@ -1098,7 +1112,6 @@
backup.end();
update(fullRect);
noUpdate = false;
- urlTimer->stop();
}
else
emit accept();
@@ -1548,9 +1561,17 @@
{
QMap<QString, QString>::Iterator it;
+ // determine which movie was selected
for (it = movieList.begin(); it != movieList.end(); ++it)
{
- if (curitemMovie == it.data())
+ QString movieTitle = it.data();
+ bool isNum = FALSE;
+ movieTitle.left(4).toInt(&isNum);
+ if (isNum) {
+ movieTitle = movieTitle.mid(4); // remove the ranking
+ }
+// if (curitemMovie == it.data())
+ if (curitemMovie == movieTitle)
{
movieNumber = it.key();
break;
@@ -1650,27 +1671,3 @@
curitem->updateDatabase(db);
RefreshMovieList();
}
-
-void VideoManager::GetMovieListingTimeOut()
-{
- //Increment the counter and check were not over the limit
- if(++GetMovieListingTimeoutCounter != 3)
- {
- //Try again
- GetMovieListing(theMovieName);
- }
- else
- {
- GetMovieListingTimeoutCounter = 0;
- cerr << "Failed to contact server" << endl;
-
- //Set the stopProcessing var so the other thread knows what to do
- stopProcessing = true;
-
- //Let the exitWin method take care of closing the dialog screen
- exitWin();
- }
-
- return;
-}
-
Index: mythvideo/mythvideo/videomanager.h
===================================================================
RCS file: /var/lib/mythcvs/mythvideo/mythvideo/videomanager.h,v
retrieving revision 1.7
diff -u -r1.7 videomanager.h
--- mythvideo/mythvideo/videomanager.h 7 Sep 2003 20:58:42 -0000 1.7
+++ mythvideo/mythvideo/videomanager.h 16 Oct 2003 06:37:47 -0000
@@ -45,7 +45,6 @@
void pageDown();
void pageUp();
void exitWin();
- void GetMovieListingTimeOut();
protected:
void paintEvent(QPaintEvent *);
@@ -58,6 +57,10 @@
private:
bool updateML;
bool noUpdate;
+ bool use_fuzzy_search;
+ bool ignore_tv_videos;
+ bool ignore_tv_series;
+ int debug;
QPixmap getPixmap(QString &level);
QSqlDatabase *db;
@@ -73,7 +76,6 @@
QMap<QString, QString> parseMovieList(QString);
void ResetCurrentItem();
- HttpComms *httpGrabber;
void RefreshMovieList();
QString ratingCountry;
void GetMovieData(QString);
@@ -132,12 +134,8 @@
QString movieRating;
int movieRuntime;
QString movieNumber;
-
- QTimer *urlTimer;
- int GetMovieListingTimeoutCounter;
- bool stopProcessing;
QString theMovieName;
-
+ bool isbusy;
bool allowselect;
};
More information about the mythtv-dev
mailing list