346
346
while(m_started && start > -1) {
347
347
// split title at parenthesis
348
348
const QString cap1 = s_anchorTitleRx->cap(1); // the anchor url
349
const QString cap2 = s_anchorTitleRx->cap(2); // the anchor text
349
const QString cap2 = s_anchorTitleRx->cap(2).stripWhiteSpace(); // the anchor text
350
350
start += s_anchorTitleRx->matchedLength();
351
351
int pPos = cap2.find('('); // if it has parentheses, use that for description
509
509
// find beginning of partial matches
510
int end = output.find(QString::fromLatin1("Other Results"), KMAX(pos, 0), false);
510
int end = output.find(QString::fromLatin1("Other Results"), QMAX(pos, 0), false);
512
end = output.find(QString::fromLatin1("Partial Matches"), KMAX(pos, 0), false);
512
end = output.find(QString::fromLatin1("Partial Matches"), QMAX(pos, 0), false);
514
end = output.find(QString::fromLatin1("Approx Matches"), KMAX(pos, 0), false);
514
end = output.find(QString::fromLatin1("Approx Matches"), QMAX(pos, 0), false);
516
516
end = output.length();
527
527
pos = s_anchorNameRx->search(output, pos+13);
528
528
while(pos > -1 && pos < end && m_matches.size() < m_limit) {
529
529
KURL u(m_url, s_anchorNameRx->cap(1));
530
s = s_anchorNameRx->cap(2) + ' ';
530
s = s_anchorNameRx->cap(2).stripWhiteSpace() + ' ';
531
531
// if more than one exact, add parentheses
532
532
if(nameMap.contains(s) && nameMap[s] > 0) {
533
533
// fix the first one that didn't have a number
551
551
pos = s_anchorNameRx->search(output, end);
552
552
while(pos > -1 && m_matches.size() < m_limit) {
553
553
KURL u(m_url, s_anchorNameRx->cap(1)); // relative URL
554
s = s_anchorNameRx->cap(2);
554
s = s_anchorNameRx->cap(2).stripWhiteSpace();
555
555
if(nameMap.contains(s) && nameMap[s] > 0) {
556
556
// fix the first one that didn't have a number
557
557
if(nameMap[s] == 1) {
734
void IMDBFetcher::doAspectRatio(const QString& str_, Data::EntryPtr entry_) {
735
QRegExp rx(QString::fromLatin1("aspect ratio:.*([\\d\\.]+\\s*:\\s*[\\d\\.]+)"), false);
738
if(rx.search(str_) > -1) {
739
// myDebug() << "aspect ratio = " << rx.cap(1) << endl;
740
entry_->setField(QString::fromLatin1("aspect-ratio"), rx.cap(1).stripWhiteSpace());
733
744
void IMDBFetcher::doAlsoKnownAs(const QString& str_, Data::EntryPtr entry_) {
734
745
if(m_fields.findIndex(QString::fromLatin1("alttitle")) == -1) {
840
851
pos = s_anchorRx->search(str_, pos+1);
841
852
while(pos > -1 && pos < endPos) {
842
853
if(s_anchorRx->cap(1).find(name) > -1) {
843
people += s_anchorRx->cap(2);
854
people.add(s_anchorRx->cap(2).stripWhiteSpace());
845
856
pos = s_anchorRx->search(str_, pos+1);
848
859
if(!people.isEmpty()) {
849
entry_->setField(fieldName_, people.join(sep));
860
entry_->setField(fieldName_, people.toList().join(sep));
918
929
// there's a column with ellipses then the character
919
930
const int pos2 = tdRx.search(castText, pos);
920
931
if(pos2 > -1 && tdRx.search(castText, pos2+1) > -1) {
921
cast += s_anchorRx->cap(2) + QString::fromLatin1("::") + tdRx.cap(1).simplifyWhiteSpace().remove(*s_tagRx);
932
cast += s_anchorRx->cap(2).stripWhiteSpace()
933
+ QString::fromLatin1("::") + tdRx.cap(1).simplifyWhiteSpace().remove(*s_tagRx);
923
cast += s_anchorRx->cap(2);
935
cast += s_anchorRx->cap(2).stripWhiteSpace();
926
938
pos = s_anchorRx->search(castText, pos+1);
1006
1018
const QString cert = QString::fromLatin1("certificates=");
1007
1019
const QString soundMix = QString::fromLatin1("sound-mix=");
1008
1020
const QString year = QString::fromLatin1("/Years/");
1021
const QString company = QString::fromLatin1("/company/");
1010
1023
// IIMdb also has links with the word "sections" in them, remove that
1011
1024
// for genres and nationalities
1013
QStringList genres, countries, langs, certs, tracks;
1026
QStringList genres, countries, langs, certs, tracks, studios;
1014
1027
for(int pos = s_anchorRx->search(str_); pos > -1; pos = s_anchorRx->search(str_, pos+1)) {
1015
1028
const QString cap1 = s_anchorRx->cap(1);
1016
1029
if(cap1.find(genre) > -1) {
1017
1030
if(s_anchorRx->cap(2).find(QString::fromLatin1(" section"), 0, false) == -1) {
1018
genres += s_anchorRx->cap(2);
1031
genres += s_anchorRx->cap(2).stripWhiteSpace();
1020
1033
} else if(cap1.find(country) > -1) {
1021
1034
if(s_anchorRx->cap(2).find(QString::fromLatin1(" section"), 0, false) == -1) {
1022
countries += s_anchorRx->cap(2);
1035
countries += s_anchorRx->cap(2).stripWhiteSpace();
1024
1037
} else if(cap1.find(lang) > -1) {
1025
langs += s_anchorRx->cap(2);
1038
langs += s_anchorRx->cap(2).stripWhiteSpace();
1026
1039
} else if(cap1.find(colorInfo) > -1) {
1027
1040
// change "black and white" to "black & white"
1028
1041
entry_->setField(QString::fromLatin1("color"),
1029
s_anchorRx->cap(2).replace(QString::fromLatin1("and"), QChar('&')));
1042
s_anchorRx->cap(2).replace(QString::fromLatin1("and"), QChar('&')).stripWhiteSpace());
1030
1043
} else if(cap1.find(cert) > -1) {
1031
certs += s_anchorRx->cap(2);
1044
certs += s_anchorRx->cap(2).stripWhiteSpace();
1032
1045
} else if(cap1.find(soundMix) > -1) {
1033
tracks += s_anchorRx->cap(2);
1046
tracks += s_anchorRx->cap(2).stripWhiteSpace();
1047
} else if(cap1.find(company) > -1) {
1048
studios += s_anchorRx->cap(2).stripWhiteSpace();
1034
1049
// if year field wasn't set before, do it now
1035
1050
} else if(entry_->field(QString::fromLatin1("year")).isEmpty() && cap1.find(year) > -1) {
1036
entry_->setField(QString::fromLatin1("year"), s_anchorRx->cap(2));
1051
entry_->setField(QString::fromLatin1("year"), s_anchorRx->cap(2).stripWhiteSpace());
1041
1056
entry_->setField(QString::fromLatin1("nationality"), countries.join(sep));
1042
1057
entry_->setField(QString::fromLatin1("language"), langs.join(sep));
1043
1058
entry_->setField(QString::fromLatin1("audio-track"), tracks.join(sep));
1059
entry_->setField(QString::fromLatin1("studio"), studios.join(sep));
1044
1060
if(!certs.isEmpty()) {
1045
1061
// first try to set default certification
1046
1062
const QStringList& certsAllowed = entry_->collection()->fieldByName(QString::fromLatin1("certification"))->allowed();