~ubuntu-branches/ubuntu/wily/kid3/wily-proposed

« back to all changes in this revision

Viewing changes to kid3/amazondialog.cpp

  • Committer: Bazaar Package Importer
  • Author(s): Jonathan Riddell
  • Date: 2010-05-26 17:57:40 UTC
  • mfrom: (1.1.11 upstream) (2.1.8 sid)
  • Revision ID: james.westby@ubuntu.com-20100526175740-c86815ej5196z3of
Tags: 1.4-1ubuntu1
* Merge with Debian, remaining changes:
  + debian/control:
    - Build-depend on libmp4v2-dev.
  + debian/rules:
    - Build WITH_MP4V2.
* Add kubuntu_01_fix_docs.diff to fix compile with KDE 4.5 and
  apply in debian/rules

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
/**
 
2
 * \file amazondialog.cpp
 
3
 * Amazon database import dialog.
 
4
 *
 
5
 * \b Project: Kid3
 
6
 * \author Urs Fleisch
 
7
 * \date 13 Dec 2009
 
8
 *
 
9
 * Copyright (C) 2009  Urs Fleisch
 
10
 *
 
11
 * This file is part of Kid3.
 
12
 *
 
13
 * Kid3 is free software; you can redistribute it and/or modify
 
14
 * it under the terms of the GNU General Public License as published by
 
15
 * the Free Software Foundation; either version 2 of the License, or
 
16
 * (at your option) any later version.
 
17
 *
 
18
 * Kid3 is distributed in the hope that it will be useful,
 
19
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 
20
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 
21
 * GNU General Public License for more details.
 
22
 *
 
23
 * You should have received a copy of the GNU General Public License
 
24
 * along with this program.  If not, see <http://www.gnu.org/licenses/>.
 
25
 */
 
26
 
 
27
#include <qregexp.h>
 
28
#include <qdom.h>
 
29
#include "kid3.h"
 
30
#include "amazonclient.h"
 
31
#include "amazondialog.h"
 
32
 
 
33
static const char* serverList[] = {
 
34
        // Parsing only works with English text
 
35
        "www.amazon.com:80",
 
36
        "www.amazon.co.uk:80",
 
37
        0                  // end of StrList
 
38
};
 
39
 
 
40
static const ImportSourceDialog::Properties props = {
 
41
        serverList,
 
42
        "www.amazon.com:80",
 
43
        0,
 
44
        "import-amazon",
 
45
        &Kid3App::s_amazonCfg,
 
46
        true
 
47
};
 
48
 
 
49
 
 
50
/**
 
51
 * Constructor.
 
52
 *
 
53
 * @param parent          parent widget
 
54
 * @param trackDataVector track data to be filled with imported values
 
55
 */
 
56
AmazonDialog::AmazonDialog(
 
57
        QWidget* parent,
 
58
        ImportTrackDataVector& trackDataVector)
 
59
        : ImportSourceDialog(parent, "Amazon", trackDataVector,
 
60
                                                                                         new AmazonClient, props)
 
61
{
 
62
}
 
63
 
 
64
/**
 
65
 * Destructor.
 
66
 */
 
67
AmazonDialog::~AmazonDialog()
 
68
{
 
69
}
 
70
 
 
71
/**
 
72
 * Replace HTML entities in a string.
 
73
 *
 
74
 * @param str string with HTML entities (e.g. &quot;)
 
75
 *
 
76
 * @return string with replaced HTML entities.
 
77
 */
 
78
static QString replaceHtmlEntities(QString str)
 
79
{
 
80
        str.replace("&quot;", "\"");
 
81
        str.replace("&nbsp;", " ");
 
82
        str.replace("&lt;", "<");
 
83
        str.replace("&gt;", ">");
 
84
        str.replace("&amp;", "&");
 
85
        return str;
 
86
}
 
87
 
 
88
/**
 
89
 * Replace HTML entities and remove HTML tags.
 
90
 *
 
91
 * @param str string containing HTML
 
92
 *
 
93
 * @return clean up string
 
94
 */
 
95
static QString removeHtml(QString str)
 
96
{
 
97
        QRegExp htmlTagRe("<[^>]+>");
 
98
        return replaceHtmlEntities(str.remove(htmlTagRe)).QCM_trimmed();
 
99
}
 
100
 
 
101
/**
 
102
 * Process finished findCddbAlbum request.
 
103
 *
 
104
 * @param searchStr search data received
 
105
 */
 
106
void AmazonDialog::parseFindResults(const QByteArray& searchStr)
 
107
{
 
108
        /* products have the following format (depending on browser):
 
109
<td class="dataColumn"><table cellpadding="0" cellspacing="0" border="0"><tr><td>                      
 
110
<a href="http://www.amazon.com/Avenger-Amon-Amarth/dp/B001VROVHO/ref=sr_1_1/178-1209985-8853325?ie=UTF8&s=music&qid=1260707733&sr=1-1"><span class="srTitle">The Avenger</span></a>                           
 
111
   by <a href="/Amon-Amarth/e/B000APIBHO/ref=sr_ntt_srch_lnk1/178-1209985-8853325?_encoding=UTF8&amp;qid=1260707733&amp;sr=1-1">Amon Amarth</a> <span class="bindingBlock">(<span class="binding">Audio CD</span> - 2009)</span> - <span class="formatText">Original recording reissued</span></td></tr>             
 
112
<td></td>                                                                                              
 
113
           or:
 
114
<div class="productTitle"><a href="http://www.amazon.com/Avenger-Amon-Amarth/dp/B001VROVHO/ref=sr_1_1?ie=UTF8&s=music&qid=1260607141&sr=1-1"> The Avenger</a> <span class="ptBrand">by <a href="/Amon-Amarth/e/B000APIBHO/ref=sr_ntt_srch_lnk_1?_encoding=UTF8&amp;qid=1260607141&amp;sr=1-1">Amon Amarth</a></span><span class="binding"> (<span class="format">Audio CD</span> - 2009)</span> - <span class="format">Original recording reissued</span></div>
 
115
         */
 
116
        QString str = QString::fromLatin1(searchStr);
 
117
        QRegExp catIdTitleArtistRe(
 
118
                "<a href=\"[^\"]+/(dp|ASIN|images|product|-)/([A-Z0-9]+)[^\"]+\">"
 
119
                "<span class=\"srTitle\">([^<]+)<.*>\\s*by\\s*(?:<[^>]+>)?([^<]+)<");
 
120
        QStringList lines = QCM_split(QRegExp("\\n{2,}"), str.remove('\r'));
 
121
        m_albumListBox->clear();
 
122
        for (QStringList::const_iterator it = lines.begin(); it != lines.end(); ++it) {
 
123
                QString line(*it);
 
124
                line.remove('\n');
 
125
                if (catIdTitleArtistRe.QCM_indexIn(line) != -1) {
 
126
                        new AlbumListItem(
 
127
                                m_albumListBox,
 
128
                                removeHtml(catIdTitleArtistRe.cap(4)) + " - " +
 
129
                                removeHtml(catIdTitleArtistRe.cap(3)),
 
130
                                catIdTitleArtistRe.cap(1),
 
131
                                catIdTitleArtistRe.cap(2));
 
132
                }
 
133
        }
 
134
        m_albumListBox->setFocus();
 
135
}
 
136
 
 
137
/**
 
138
 * Parse result of album request and populate m_trackDataVector with results.
 
139
 *
 
140
 * @param albumStr album data received
 
141
 */
 
142
void AmazonDialog::parseAlbumResults(const QByteArray& albumStr)
 
143
{
 
144
        /*
 
145
                title (empty lines removed):
 
146
<div class="buying"><h1 class="parseasinTitle"><span id="btAsinTitle" style="">Avenger</span></h1>
 
147
<span >
 
148
<a href="/Amon-Amarth/e/B000APIBHO/ref=ntt_mus_dp_pel">Amon Amarth</a>
 
149
</span>
 
150
</div>
 
151
 
 
152
                details (empty lines removed):
 
153
<a name="productDetails" id="productDetails"></a>
 
154
<hr noshade="noshade" size="1" class="bucketDivider" />
 
155
<table cellpadding="0" cellspacing="0" border="0">     
 
156
  <tr>                                                 
 
157
    <td class="bucket">                                
 
158
<h2>Product Details</h2>
 
159
  <div class="content"> 
 
160
<ul>
 
161
<li><b>Audio CD</b>  (November 2, 1999)</li>
 
162
<li><b>Original Release Date:</b> November 2, 1999</li>
 
163
<li><b>Number of Discs:</b> 1</li>
 
164
<li><b>Label:</b> Metal Blade</li>
 
165
 
 
166
                tracks:
 
167
<tr class='rowEven'><td class="playCol"><a href="/gp/dmusic/media/sample.m3u/ref=dm_mu_dp_trk1_smpl/175-1810673-7649752?ie=UTF8&catalogItemType=track&ASIN=B0016OAHCK&DownloadLocation=CD" onclick='return cd_trackPreviewPressed("B0016OAHCK");'><img src="http://g-ecx.images-amazon.com/images/G/01/digital/music/dp/play-control-2._V223646478_.gif" width="19" alt="listen" id="cd_trackPreviewB0016OAHCK" title="listen" height="19" border="0" /></a></td><td class="titleCol">&nbsp; 1. <a href="http://www.amazon.com/gp/product/B0016OAHCK/ref=dm_mu_dp_trk1/175-1810673-7649752">Bleed For Ancient Gods</a></td><td class="runtimeCol"> 4:31</td><td class="priceCol">$0.99</td><td class="buyCol">
 
168
 
 
169
    alternatively (empty lines removed):
 
170
<tr class="listRowEven">
 
171
<td>
 
172
1. Before the Devil Knows You're Dead
 
173
</td>                                
 
174
         */
 
175
        QString str = QString::fromLatin1(albumStr);
 
176
        FrameCollection framesHdr;
 
177
        // search for 'id="btAsinTitle"', text after '>' until ' [' or '<' => album
 
178
        int end = 0;
 
179
        int start = str.QCM_indexOf("id=\"btAsinTitle\"");
 
180
        if (start >= 0) {
 
181
                start = str.QCM_indexOf(">", start);
 
182
                if (start >= 0) {
 
183
                        end = str.QCM_indexOf("<", start);
 
184
                        if (end > start) {
 
185
                                int bracketPos = str.QCM_indexOf(" [", start);
 
186
                                if (bracketPos >= 0 && bracketPos < end) {
 
187
                                        end = bracketPos;
 
188
                                }
 
189
                                framesHdr.setAlbum(
 
190
                                        replaceHtmlEntities(str.mid(start + 1, end - start - 1)));
 
191
 
 
192
                                // next '<a href=', text after '>' until '<' => artist
 
193
                                start = str.QCM_indexOf("<a href=", end);
 
194
                                if (start >= 0) {
 
195
                                        start = str.QCM_indexOf(">", start);
 
196
                                        if (start >= 0) {
 
197
                                                end = str.QCM_indexOf("<", start);
 
198
                                                if (end > start) {
 
199
                                                        framesHdr.setArtist(
 
200
                                                                replaceHtmlEntities(str.mid(start + 1, end - start - 1)));
 
201
                                                }
 
202
                                        }
 
203
                                }
 
204
                        }
 
205
                }
 
206
        }
 
207
        
 
208
        // search for >Product Details<, >Original Release Date:<, >Label:<
 
209
        const bool additionalTags = getAdditionalTags();
 
210
        QString albumArtist;
 
211
        start = str.QCM_indexOf(">Product Details<");
 
212
        if (start >= 0) {
 
213
                int detailStart = str.QCM_indexOf(">Original Release Date:<", start);
 
214
                if (detailStart < 0) {
 
215
                        detailStart  = str.QCM_indexOf(">Audio CD<", start);
 
216
                }
 
217
                if (detailStart >= 0) {
 
218
                        int detailEnd = str.QCM_indexOf("\n", detailStart + 10);
 
219
                        if (detailEnd > detailStart + 10) {
 
220
                                QRegExp yearRe("(\\d{4})");
 
221
                                if (yearRe.QCM_indexIn(
 
222
                                                        str.mid(detailStart + 10, detailEnd - detailStart - 11)) >= 0) {
 
223
                                        framesHdr.setYear(yearRe.cap(1).toInt());
 
224
                                }
 
225
                        }
 
226
                }
 
227
                if (additionalTags) {
 
228
                        detailStart = str.QCM_indexOf(">Label:<", start);
 
229
                        if (detailStart > 0) {
 
230
                                int detailEnd = str.QCM_indexOf("\n", detailStart + 8);
 
231
                                if (detailEnd > detailStart + 8) {
 
232
                                        QRegExp labelRe(">\\s*([^<]+)<");
 
233
                                        if (labelRe.QCM_indexIn(
 
234
                                                                str.mid(detailStart + 8, detailEnd - detailStart - 9)) >= 0) {
 
235
                                                framesHdr.setValue(Frame::FT_Publisher, removeHtml(labelRe.cap(1)));
 
236
                                        }
 
237
                                }
 
238
                        }
 
239
                        detailStart = str.QCM_indexOf(">Performer:<", start);
 
240
                        if (detailStart > 0) {
 
241
                                int detailEnd = str.QCM_indexOf("</li>", detailStart + 12);
 
242
                                if (detailEnd > detailStart + 12) {
 
243
                                        framesHdr.setValue(
 
244
                                                Frame::FT_Performer,
 
245
                                                removeHtml(str.mid(detailStart + 11, detailEnd - detailStart - 11)));
 
246
                                }
 
247
                        }
 
248
                        detailStart = str.QCM_indexOf(">Orchestra:<", start);
 
249
                        if (detailStart > 0) {
 
250
                                int detailEnd = str.QCM_indexOf("</li>", detailStart + 12);
 
251
                                if (detailEnd > detailStart + 12) {
 
252
                                        albumArtist =
 
253
                                                removeHtml(str.mid(detailStart + 11, detailEnd - detailStart - 11));
 
254
                                }
 
255
                        }
 
256
                        detailStart = str.QCM_indexOf(">Conductor:<", start);
 
257
                        if (detailStart > 0) {
 
258
                                int detailEnd = str.QCM_indexOf("</li>", detailStart + 12);
 
259
                                if (detailEnd > detailStart + 12) {
 
260
                                        framesHdr.setValue(
 
261
                                                Frame::FT_Conductor,
 
262
                                                removeHtml(str.mid(detailStart + 11, detailEnd - detailStart - 11)));
 
263
                                }
 
264
                        }
 
265
                        detailStart = str.QCM_indexOf(">Composer:<", start);
 
266
                        if (detailStart > 0) {
 
267
                                int detailEnd = str.QCM_indexOf("</li>", detailStart + 11);
 
268
                                if (detailEnd > detailStart + 11) {
 
269
                                        framesHdr.setValue(
 
270
                                                Frame::FT_Composer,
 
271
                                                removeHtml(str.mid(detailStart + 10, detailEnd - detailStart - 10)));
 
272
                                }
 
273
                        }
 
274
                }
 
275
        }
 
276
 
 
277
        if (getCoverArt()) {
 
278
                // <input type="hidden" id="ASIN" name="ASIN" value="B0025AY48W" />
 
279
                start = str.QCM_indexOf("id=\"ASIN\"");
 
280
                if (start > 0) {
 
281
                        start = str.QCM_indexOf("value=\"", start);
 
282
                        if (start > 0) {
 
283
                                end = str.QCM_indexOf("\"", start + 7);
 
284
                                if (end > start) {
 
285
                                        m_trackDataVector.setCoverArtUrl(
 
286
                                                QString("http://www.amazon.com/dp/") +
 
287
                                                str.mid(start + 7, end - start - 7));
 
288
                                }
 
289
                        }
 
290
                }
 
291
        }
 
292
 
 
293
        bool hasTitleCol = false;
 
294
        bool hasArtist = str.QCM_indexOf("<td>Song Title</td><td>Artist</td>") != -1;
 
295
        // search 'class="titleCol"', next '<a href=', text after '>' until '<'
 
296
        // => title
 
297
        // if not found: alternatively look for 'class="listRow'
 
298
        start = str.QCM_indexOf("class=\"titleCol\"");
 
299
        if (start >= 0) {
 
300
                hasTitleCol = true;
 
301
        } else {
 
302
                start = str.QCM_indexOf("class=\"listRow");
 
303
        }
 
304
        if (start >= 0) {
 
305
                QRegExp durationRe("(\\d+):(\\d+)");
 
306
                QRegExp nrTitleRe("\\s*\\d+\\.\\s+(.*\\S)");
 
307
                FrameCollection frames(framesHdr);
 
308
                ImportTrackDataVector::iterator it = m_trackDataVector.begin();
 
309
                bool atTrackDataListEnd = (it == m_trackDataVector.end());
 
310
                int trackNr = 1;
 
311
                while (start >= 0) {
 
312
                        QString title;
 
313
                        QString artist;
 
314
                        int duration = 0;
 
315
                        if (hasTitleCol) {
 
316
                                end = str.QCM_indexOf("\n", start);
 
317
                                if (end > start) {
 
318
                                        QString line = str.mid(start, end - start);
 
319
                                        int titleStart = line.QCM_indexOf("<a href=");
 
320
                                        if (titleStart >= 0) {
 
321
                                                titleStart = line.QCM_indexOf(">", titleStart);
 
322
                                                if (titleStart >= 0) {
 
323
                                                        int titleEnd = line.QCM_indexOf("<", titleStart);
 
324
                                                        if (titleEnd > titleStart) {
 
325
                                                                title = line.mid(titleStart + 1, titleEnd - titleStart - 1);
 
326
                                                                // if there was an Artist title,
 
327
                                                                // search for artist in a second titleCol
 
328
                                                                if (hasArtist) {
 
329
                                                                        int artistStart =
 
330
                                                                                line.QCM_indexOf("class=\"titleCol\"", titleEnd);
 
331
                                                                        if (artistStart >= 0) {
 
332
                                                                                artistStart = line.QCM_indexOf("<a href=", artistStart);
 
333
                                                                                if (artistStart >= 0) {
 
334
                                                                                        artistStart = line.QCM_indexOf(">", artistStart);
 
335
                                                                                        if (artistStart >= 0) {
 
336
                                                                                                int artistEnd = line.QCM_indexOf("<", artistStart);
 
337
                                                                                                if (artistEnd > artistStart) {
 
338
                                                                                                        artist = line.mid(
 
339
                                                                                                                artistStart + 1, artistEnd - artistStart - 1);
 
340
                                                                                                        if (albumArtist.isEmpty()) {
 
341
                                                                                                                albumArtist = frames.getArtist();
 
342
                                                                                                        }
 
343
                                                                                                }
 
344
                                                                                        }
 
345
                                                                                }
 
346
                                                                        }
 
347
                                                                }
 
348
                                                                // search for next 'class="', if it is 'class="runtimeCol"',
 
349
                                                                // text after '>' until '<' => duration
 
350
                                                                int runtimeStart =
 
351
                                                                        line.QCM_indexOf("class=\"runtimeCol\"", titleEnd);
 
352
                                                                if (runtimeStart >= 0) {
 
353
                                                                        runtimeStart = line.QCM_indexOf(">", runtimeStart + 18);
 
354
                                                                        if (runtimeStart >= 0) {
 
355
                                                                                int runtimeEnd = line.QCM_indexOf("<", runtimeStart);
 
356
                                                                                if (runtimeEnd > runtimeStart) {
 
357
                                                                                        if (durationRe.QCM_indexIn(
 
358
                                                                                                                line.mid(runtimeStart + 1,
 
359
                                                                                                                                                 runtimeEnd - runtimeStart - 1)) >= 0) {
 
360
                                                                                                duration = durationRe.cap(1).toInt() * 60 +
 
361
                                                                                                        durationRe.cap(2).toInt();
 
362
                                                                                        }
 
363
                                                                                }
 
364
                                                                        }
 
365
                                                                }
 
366
                                                                start = str.QCM_indexOf("class=\"titleCol\"", end);
 
367
                                                        } else {
 
368
                                                                start = -1;
 
369
                                                        }
 
370
                                                }
 
371
                                        }
 
372
                                }
 
373
                        } else {
 
374
                                // 'class="listRow' found
 
375
                                start = str.QCM_indexOf("<td>", start);
 
376
                                if (start >= 0) {
 
377
                                        end = str.QCM_indexOf("</td>", start);
 
378
                                        if (end > start &&
 
379
                                                        nrTitleRe.QCM_indexIn(str.mid(start + 4, end - start - 4)) >= 0) {
 
380
                                                title = nrTitleRe.cap(1);
 
381
                                                start = str.QCM_indexOf("class=\"listRow", end);
 
382
                                        } else {
 
383
                                                start = -1; 
 
384
                                        }
 
385
                                }
 
386
                        }
 
387
                        if (!title.isEmpty()) {
 
388
                                frames.setTitle(replaceHtmlEntities(title));
 
389
                                if (!artist.isEmpty()) {
 
390
                                        frames.setArtist(replaceHtmlEntities(artist));
 
391
                                }
 
392
                                if (!albumArtist.isEmpty() && additionalTags) {
 
393
                                        frames.setValue(Frame::FT_AlbumArtist, albumArtist);
 
394
                                }
 
395
                                frames.setTrack(trackNr);
 
396
                                if (atTrackDataListEnd) {
 
397
                                        ImportTrackData trackData;
 
398
                                        trackData.setFrameCollection(frames);
 
399
                                        trackData.setImportDuration(duration);
 
400
                                        m_trackDataVector.push_back(trackData);
 
401
                                } else {
 
402
                                        (*it).setFrameCollection(frames);
 
403
                                        (*it).setImportDuration(duration);
 
404
                                        ++it;
 
405
                                        atTrackDataListEnd = (it == m_trackDataVector.end());
 
406
                                }
 
407
                                ++trackNr;
 
408
                                frames = framesHdr;
 
409
                        }
 
410
                }
 
411
 
 
412
                // handle redundant tracks
 
413
                frames.clear();
 
414
                while (!atTrackDataListEnd) {
 
415
                        if ((*it).getFileDuration() == 0) {
 
416
                                it = m_trackDataVector.erase(it);
 
417
                        } else {
 
418
                                (*it).setFrameCollection(frames);
 
419
                                (*it).setImportDuration(0);
 
420
                                ++it;
 
421
                        }
 
422
                        atTrackDataListEnd = (it == m_trackDataVector.end());
 
423
                }
 
424
        } else if (!framesHdr.empty()) {
 
425
                // if there are no track data, fill frame header data
 
426
                for (ImportTrackDataVector::iterator it = m_trackDataVector.begin();
 
427
                                 it != m_trackDataVector.end();
 
428
                                 ++it) {
 
429
                        (*it).setFrameCollection(framesHdr);
 
430
                }
 
431
        }
 
432
}