2
* \file amazondialog.cpp
3
* Amazon database import dialog.
9
* Copyright (C) 2009 Urs Fleisch
11
* This file is part of Kid3.
13
* Kid3 is free software; you can redistribute it and/or modify
14
* it under the terms of the GNU General Public License as published by
15
* the Free Software Foundation; either version 2 of the License, or
16
* (at your option) any later version.
18
* Kid3 is distributed in the hope that it will be useful,
19
* but WITHOUT ANY WARRANTY; without even the implied warranty of
20
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
* GNU General Public License for more details.
23
* You should have received a copy of the GNU General Public License
24
* along with this program. If not, see <http://www.gnu.org/licenses/>.
30
#include "amazonclient.h"
31
#include "amazondialog.h"
33
static const char* serverList[] = {
34
// Parsing only works with English text
36
"www.amazon.co.uk:80",
40
static const ImportSourceDialog::Properties props = {
45
&Kid3App::s_amazonCfg,
53
* @param parent parent widget
54
* @param trackDataVector track data to be filled with imported values
56
AmazonDialog::AmazonDialog(
58
ImportTrackDataVector& trackDataVector)
59
: ImportSourceDialog(parent, "Amazon", trackDataVector,
60
new AmazonClient, props)
67
AmazonDialog::~AmazonDialog()
72
* Replace HTML entities in a string.
74
* @param str string with HTML entities (e.g. ")
76
* @return string with replaced HTML entities.
78
static QString replaceHtmlEntities(QString str)
80
str.replace(""", "\"");
81
str.replace(" ", " ");
82
str.replace("<", "<");
83
str.replace(">", ">");
84
str.replace("&", "&");
89
* Replace HTML entities and remove HTML tags.
91
* @param str string containing HTML
93
* @return clean up string
95
static QString removeHtml(QString str)
97
QRegExp htmlTagRe("<[^>]+>");
98
return replaceHtmlEntities(str.remove(htmlTagRe)).QCM_trimmed();
102
* Process finished findCddbAlbum request.
104
* @param searchStr search data received
106
void AmazonDialog::parseFindResults(const QByteArray& searchStr)
108
/* products have the following format (depending on browser):
109
<td class="dataColumn"><table cellpadding="0" cellspacing="0" border="0"><tr><td>
110
<a href="http://www.amazon.com/Avenger-Amon-Amarth/dp/B001VROVHO/ref=sr_1_1/178-1209985-8853325?ie=UTF8&s=music&qid=1260707733&sr=1-1"><span class="srTitle">The Avenger</span></a>
111
by <a href="/Amon-Amarth/e/B000APIBHO/ref=sr_ntt_srch_lnk1/178-1209985-8853325?_encoding=UTF8&qid=1260707733&sr=1-1">Amon Amarth</a> <span class="bindingBlock">(<span class="binding">Audio CD</span> - 2009)</span> - <span class="formatText">Original recording reissued</span></td></tr>
114
<div class="productTitle"><a href="http://www.amazon.com/Avenger-Amon-Amarth/dp/B001VROVHO/ref=sr_1_1?ie=UTF8&s=music&qid=1260607141&sr=1-1"> The Avenger</a> <span class="ptBrand">by <a href="/Amon-Amarth/e/B000APIBHO/ref=sr_ntt_srch_lnk_1?_encoding=UTF8&qid=1260607141&sr=1-1">Amon Amarth</a></span><span class="binding"> (<span class="format">Audio CD</span> - 2009)</span> - <span class="format">Original recording reissued</span></div>
116
QString str = QString::fromLatin1(searchStr);
117
QRegExp catIdTitleArtistRe(
118
"<a href=\"[^\"]+/(dp|ASIN|images|product|-)/([A-Z0-9]+)[^\"]+\">"
119
"<span class=\"srTitle\">([^<]+)<.*>\\s*by\\s*(?:<[^>]+>)?([^<]+)<");
120
QStringList lines = QCM_split(QRegExp("\\n{2,}"), str.remove('\r'));
121
m_albumListBox->clear();
122
for (QStringList::const_iterator it = lines.begin(); it != lines.end(); ++it) {
125
if (catIdTitleArtistRe.QCM_indexIn(line) != -1) {
128
removeHtml(catIdTitleArtistRe.cap(4)) + " - " +
129
removeHtml(catIdTitleArtistRe.cap(3)),
130
catIdTitleArtistRe.cap(1),
131
catIdTitleArtistRe.cap(2));
134
m_albumListBox->setFocus();
138
* Parse result of album request and populate m_trackDataVector with results.
140
* @param albumStr album data received
142
void AmazonDialog::parseAlbumResults(const QByteArray& albumStr)
145
title (empty lines removed):
146
<div class="buying"><h1 class="parseasinTitle"><span id="btAsinTitle" style="">Avenger</span></h1>
148
<a href="/Amon-Amarth/e/B000APIBHO/ref=ntt_mus_dp_pel">Amon Amarth</a>
152
details (empty lines removed):
153
<a name="productDetails" id="productDetails"></a>
154
<hr noshade="noshade" size="1" class="bucketDivider" />
155
<table cellpadding="0" cellspacing="0" border="0">
158
<h2>Product Details</h2>
159
<div class="content">
161
<li><b>Audio CD</b> (November 2, 1999)</li>
162
<li><b>Original Release Date:</b> November 2, 1999</li>
163
<li><b>Number of Discs:</b> 1</li>
164
<li><b>Label:</b> Metal Blade</li>
167
<tr class='rowEven'><td class="playCol"><a href="/gp/dmusic/media/sample.m3u/ref=dm_mu_dp_trk1_smpl/175-1810673-7649752?ie=UTF8&catalogItemType=track&ASIN=B0016OAHCK&DownloadLocation=CD" onclick='return cd_trackPreviewPressed("B0016OAHCK");'><img src="http://g-ecx.images-amazon.com/images/G/01/digital/music/dp/play-control-2._V223646478_.gif" width="19" alt="listen" id="cd_trackPreviewB0016OAHCK" title="listen" height="19" border="0" /></a></td><td class="titleCol"> 1. <a href="http://www.amazon.com/gp/product/B0016OAHCK/ref=dm_mu_dp_trk1/175-1810673-7649752">Bleed For Ancient Gods</a></td><td class="runtimeCol"> 4:31</td><td class="priceCol">$0.99</td><td class="buyCol">
169
alternatively (empty lines removed):
170
<tr class="listRowEven">
172
1. Before the Devil Knows You're Dead
175
QString str = QString::fromLatin1(albumStr);
176
FrameCollection framesHdr;
177
// search for 'id="btAsinTitle"', text after '>' until ' [' or '<' => album
179
int start = str.QCM_indexOf("id=\"btAsinTitle\"");
181
start = str.QCM_indexOf(">", start);
183
end = str.QCM_indexOf("<", start);
185
int bracketPos = str.QCM_indexOf(" [", start);
186
if (bracketPos >= 0 && bracketPos < end) {
190
replaceHtmlEntities(str.mid(start + 1, end - start - 1)));
192
// next '<a href=', text after '>' until '<' => artist
193
start = str.QCM_indexOf("<a href=", end);
195
start = str.QCM_indexOf(">", start);
197
end = str.QCM_indexOf("<", start);
200
replaceHtmlEntities(str.mid(start + 1, end - start - 1)));
208
// search for >Product Details<, >Original Release Date:<, >Label:<
209
const bool additionalTags = getAdditionalTags();
211
start = str.QCM_indexOf(">Product Details<");
213
int detailStart = str.QCM_indexOf(">Original Release Date:<", start);
214
if (detailStart < 0) {
215
detailStart = str.QCM_indexOf(">Audio CD<", start);
217
if (detailStart >= 0) {
218
int detailEnd = str.QCM_indexOf("\n", detailStart + 10);
219
if (detailEnd > detailStart + 10) {
220
QRegExp yearRe("(\\d{4})");
221
if (yearRe.QCM_indexIn(
222
str.mid(detailStart + 10, detailEnd - detailStart - 11)) >= 0) {
223
framesHdr.setYear(yearRe.cap(1).toInt());
227
if (additionalTags) {
228
detailStart = str.QCM_indexOf(">Label:<", start);
229
if (detailStart > 0) {
230
int detailEnd = str.QCM_indexOf("\n", detailStart + 8);
231
if (detailEnd > detailStart + 8) {
232
QRegExp labelRe(">\\s*([^<]+)<");
233
if (labelRe.QCM_indexIn(
234
str.mid(detailStart + 8, detailEnd - detailStart - 9)) >= 0) {
235
framesHdr.setValue(Frame::FT_Publisher, removeHtml(labelRe.cap(1)));
239
detailStart = str.QCM_indexOf(">Performer:<", start);
240
if (detailStart > 0) {
241
int detailEnd = str.QCM_indexOf("</li>", detailStart + 12);
242
if (detailEnd > detailStart + 12) {
245
removeHtml(str.mid(detailStart + 11, detailEnd - detailStart - 11)));
248
detailStart = str.QCM_indexOf(">Orchestra:<", start);
249
if (detailStart > 0) {
250
int detailEnd = str.QCM_indexOf("</li>", detailStart + 12);
251
if (detailEnd > detailStart + 12) {
253
removeHtml(str.mid(detailStart + 11, detailEnd - detailStart - 11));
256
detailStart = str.QCM_indexOf(">Conductor:<", start);
257
if (detailStart > 0) {
258
int detailEnd = str.QCM_indexOf("</li>", detailStart + 12);
259
if (detailEnd > detailStart + 12) {
262
removeHtml(str.mid(detailStart + 11, detailEnd - detailStart - 11)));
265
detailStart = str.QCM_indexOf(">Composer:<", start);
266
if (detailStart > 0) {
267
int detailEnd = str.QCM_indexOf("</li>", detailStart + 11);
268
if (detailEnd > detailStart + 11) {
271
removeHtml(str.mid(detailStart + 10, detailEnd - detailStart - 10)));
278
// <input type="hidden" id="ASIN" name="ASIN" value="B0025AY48W" />
279
start = str.QCM_indexOf("id=\"ASIN\"");
281
start = str.QCM_indexOf("value=\"", start);
283
end = str.QCM_indexOf("\"", start + 7);
285
m_trackDataVector.setCoverArtUrl(
286
QString("http://www.amazon.com/dp/") +
287
str.mid(start + 7, end - start - 7));
293
bool hasTitleCol = false;
294
bool hasArtist = str.QCM_indexOf("<td>Song Title</td><td>Artist</td>") != -1;
295
// search 'class="titleCol"', next '<a href=', text after '>' until '<'
297
// if not found: alternatively look for 'class="listRow'
298
start = str.QCM_indexOf("class=\"titleCol\"");
302
start = str.QCM_indexOf("class=\"listRow");
305
QRegExp durationRe("(\\d+):(\\d+)");
306
QRegExp nrTitleRe("\\s*\\d+\\.\\s+(.*\\S)");
307
FrameCollection frames(framesHdr);
308
ImportTrackDataVector::iterator it = m_trackDataVector.begin();
309
bool atTrackDataListEnd = (it == m_trackDataVector.end());
316
end = str.QCM_indexOf("\n", start);
318
QString line = str.mid(start, end - start);
319
int titleStart = line.QCM_indexOf("<a href=");
320
if (titleStart >= 0) {
321
titleStart = line.QCM_indexOf(">", titleStart);
322
if (titleStart >= 0) {
323
int titleEnd = line.QCM_indexOf("<", titleStart);
324
if (titleEnd > titleStart) {
325
title = line.mid(titleStart + 1, titleEnd - titleStart - 1);
326
// if there was an Artist title,
327
// search for artist in a second titleCol
330
line.QCM_indexOf("class=\"titleCol\"", titleEnd);
331
if (artistStart >= 0) {
332
artistStart = line.QCM_indexOf("<a href=", artistStart);
333
if (artistStart >= 0) {
334
artistStart = line.QCM_indexOf(">", artistStart);
335
if (artistStart >= 0) {
336
int artistEnd = line.QCM_indexOf("<", artistStart);
337
if (artistEnd > artistStart) {
339
artistStart + 1, artistEnd - artistStart - 1);
340
if (albumArtist.isEmpty()) {
341
albumArtist = frames.getArtist();
348
// search for next 'class="', if it is 'class="runtimeCol"',
349
// text after '>' until '<' => duration
351
line.QCM_indexOf("class=\"runtimeCol\"", titleEnd);
352
if (runtimeStart >= 0) {
353
runtimeStart = line.QCM_indexOf(">", runtimeStart + 18);
354
if (runtimeStart >= 0) {
355
int runtimeEnd = line.QCM_indexOf("<", runtimeStart);
356
if (runtimeEnd > runtimeStart) {
357
if (durationRe.QCM_indexIn(
358
line.mid(runtimeStart + 1,
359
runtimeEnd - runtimeStart - 1)) >= 0) {
360
duration = durationRe.cap(1).toInt() * 60 +
361
durationRe.cap(2).toInt();
366
start = str.QCM_indexOf("class=\"titleCol\"", end);
374
// 'class="listRow' found
375
start = str.QCM_indexOf("<td>", start);
377
end = str.QCM_indexOf("</td>", start);
379
nrTitleRe.QCM_indexIn(str.mid(start + 4, end - start - 4)) >= 0) {
380
title = nrTitleRe.cap(1);
381
start = str.QCM_indexOf("class=\"listRow", end);
387
if (!title.isEmpty()) {
388
frames.setTitle(replaceHtmlEntities(title));
389
if (!artist.isEmpty()) {
390
frames.setArtist(replaceHtmlEntities(artist));
392
if (!albumArtist.isEmpty() && additionalTags) {
393
frames.setValue(Frame::FT_AlbumArtist, albumArtist);
395
frames.setTrack(trackNr);
396
if (atTrackDataListEnd) {
397
ImportTrackData trackData;
398
trackData.setFrameCollection(frames);
399
trackData.setImportDuration(duration);
400
m_trackDataVector.push_back(trackData);
402
(*it).setFrameCollection(frames);
403
(*it).setImportDuration(duration);
405
atTrackDataListEnd = (it == m_trackDataVector.end());
412
// handle redundant tracks
414
while (!atTrackDataListEnd) {
415
if ((*it).getFileDuration() == 0) {
416
it = m_trackDataVector.erase(it);
418
(*it).setFrameCollection(frames);
419
(*it).setImportDuration(0);
422
atTrackDataListEnd = (it == m_trackDataVector.end());
424
} else if (!framesHdr.empty()) {
425
// if there are no track data, fill frame header data
426
for (ImportTrackDataVector::iterator it = m_trackDataVector.begin();
427
it != m_trackDataVector.end();
429
(*it).setFrameCollection(framesHdr);