1
# -*- coding: UTF-8 -*-
3
__revision__ = '$Id: PluginMovieIMDB-de.py 1433 2010-07-14 20:13:48Z mikej06 $'
5
# Copyright (c) 2007-2009 Michael Jahn
7
# This program is free software; you can redistribute it and/or modify
8
# it under the terms of the GNU General Public License as published by
9
# the Free Software Foundation; either version 2 of the License, or
10
# (at your option) any later version.
12
# This program is distributed in the hope that it will be useful,
13
# but WITHOUT ANY WARRANTY; without even the implied warranty of
14
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15
# GNU Library General Public License for more details.
17
# You should have received a copy of the GNU General Public License
18
# along with this program; if not, write to the Free Software
19
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
21
# You may use and distribute this software under the terms of the
22
# GNU General Public License, version 2 or later
27
plugin_name = 'IMDb-de'
28
plugin_description = 'Internet Movie Database German'
29
plugin_url = 'www.imdb.de'
30
plugin_language = _('German')
31
plugin_author = 'Michael Jahn'
32
plugin_author_email = 'mikej06@hotmail.com'
33
plugin_version = '1.7'
35
class Plugin(movie.Movie):
36
def __init__(self, id):
37
self.encode = 'iso8859-1'
39
self.url = "http://www.imdb.de/title/tt%s" % str(self.movie_id)
42
self.cast_page = self.open_page(url=self.url + '/fullcredits')
43
self.plot_page = self.open_page(url=self.url + '/plotsummary')
44
self.comp_page = self.open_page(url=self.url + '/companycredits')
45
# looking for the original imdb page
46
self.imdb_page = self.open_page(url="http://www.imdb.com/title/tt%s" % str(self.movie_id))
47
self.imdb_plot_page = self.open_page(url="http://www.imdb.com/title/tt%s/plotsummary" % str(self.movie_id))
48
# correction of all &#xxx entities
49
self.page = gutils.convert_entities(self.page)
50
self.cast_page = gutils.convert_entities(self.cast_page)
51
self.plot_page = gutils.convert_entities(self.plot_page)
52
self.comp_page = gutils.convert_entities(self.comp_page)
53
self.imdb_page = gutils.convert_entities(self.imdb_page)
54
self.imdb_plot_page = gutils.convert_entities(self.imdb_plot_page)
57
tmp = string.find(self.page, 'a name="poster"')
58
if tmp == -1: # poster not available
61
self.image_url = gutils.trim(self.page[tmp:], 'src="', '"')
63
def get_o_title(self):
64
self.o_title = gutils.trim(self.page, '<h1>', '<span')
65
tmp = gutils.regextrim(self.page, '<h5>(Alternativ|Auch bekannt als):', '</div>')
67
elements = string.split(tmp, '<br>')
68
for element in elements:
69
index = string.find(element,'(Originaltitel)')
72
self.o_title = gutils.before(tmp, '- ')
75
self.o_title = string.replace(self.o_title, '"', '')
79
self.title = gutils.trim(self.page, '<h1>', '<span')
80
elements = string.split(gutils.regextrim(self.page, '<h5>(Alternativ|Auch bekannt als):', '</div>'), '<i class="transl"')
82
for element in elements:
83
tmp = gutils.before(gutils.trim(element, '>', '[de]'), '(')
88
def get_director(self):
89
self.director = gutils.trim(self.page,'<h5>Regie</h5>', '<br/>')
90
if self.director == '':
91
self.director = gutils.trim(self.page,'<h5>Regisseur:</h5>', '</div>')
92
self.director = self.__before_more(self.director)
93
self.director = self.director.replace('<br/>', ', ')
94
self.director = gutils.clean(self.director)
95
self.director = re.sub(',$', '', self.director)
98
self.plot = gutils.trim(self.page, '<h5>Kurzbeschreibung:</h5>', '</div>')
99
self.plot = self.__before_more(self.plot)
100
elements = string.split(self.plot_page, '<p class="plotpar">')
101
if len(elements) > 1:
102
self.plot = self.plot + '\n\n'
104
for element in elements:
106
self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n'
108
# nothing in german found, try original
109
self.plot = gutils.regextrim(self.imdb_page, '<h5>Plot:</h5>', '(</div>|<a href.*)')
110
self.plot = self.__before_more(self.plot)
111
elements = string.split(self.imdb_plot_page, '<p class="plotpar">')
112
if len(elements) > 1:
113
self.plot = self.plot + '\n\n'
115
for element in elements:
117
self.plot = self.plot + gutils.strip_tags(gutils.before(element, '</a>')) + '\n\n'
120
self.year = gutils.trim(self.page, '<h1>', ' <span class')
121
self.year = gutils.trim(self.year, '(', ')')
123
def get_runtime(self):
124
self.runtime = gutils.regextrim(self.page, '<h5>L[^n]+nge:</h5>', ' [Mm]in')
127
self.genre = gutils.trim(self.page, '<h5>Genre:</h5>', '</div>')
128
self.genre = self.__before_more(self.genre)
132
self.cast = gutils.trim(self.cast_page, '<table class="cast">', '</table>')
134
self.cast = gutils.trim(self.page, '<table class="cast">', '</table>')
135
self.cast = string.replace(self.cast, ' ... ', _(' as ').encode('utf8'))
136
self.cast = string.replace(self.cast, '...', _(' as ').encode('utf8'))
137
self.cast = string.replace(self.cast, '</tr><tr>', "\n")
138
self.cast = re.sub('</tr>[ \t]*<tr[ \t]*class="even">', "\n", self.cast)
139
self.cast = re.sub('</tr>[ \t]*<tr[ \t]*class="odd">', "\n", self.cast)
140
self.cast = self.__before_more(self.cast)
141
self.cast = re.sub('[ ]+', ' ', self.cast)
143
def get_classification(self):
144
self.classification = gutils.trim(gutils.trim(self.page, 'Altersfreigabe:', '</div>'), 'Deutschland:', '|')
146
def get_studio(self):
147
self.studio = gutils.trim(self.comp_page, '<h2>Produktionsfirmen</h2>', '</ul>')
148
self.studio = string.replace(self.studio, '</li><li>', ', ')
150
self.studio = gutils.trim(self.page, '<h5>Firma:</h5>', '</div>')
151
self.studio = self.__before_more(self.studio)
153
def get_o_site(self):
157
self.site = "http://www.imdb.de/title/tt%s" % self.movie_id
159
def get_trailer(self):
160
self.trailer = "http://www.imdb.com/title/tt%s/trailers" % self.movie_id
162
def get_country(self):
163
self.country = gutils.trim(self.page, '<h5>Land:</h5>', '</div>')
164
self.country = self.__before_more(self.country)
165
self.country = re.sub('[\n]+', '', self.country)
166
self.country = re.sub('[ ]+', ' ', self.country)
168
def get_rating(self):
169
self.rating = gutils.trim(self.page, '<h5>Nutzer-Bewertung:</h5>', '/10')
172
tmp = re.findall('[0-9.,]+', gutils.clean(self.rating))
173
if tmp and len(tmp) > 0:
174
self.rating = round(float(tmp[0].replace(',', '.')))
182
language = gutils.trim(self.page, '<h5>Sprache:</h5>', '</div>')
183
language = gutils.strip_tags(language)
184
language = re.sub('[\n]+', '', language)
185
language = re.sub('[ ]+', ' ', language)
186
language = language.rstrip()
187
color = gutils.trim(self.page, '<h5>Farbe:</h5>', '</div>')
188
color = gutils.strip_tags(color)
189
color = re.sub('[\n]+', '', color)
190
color = re.sub('[ ]+', ' ', color)
191
color = re.sub('[ ]+$', '', color)
192
color = color.rstrip()
193
sound = gutils.trim(self.page, '<h5>Tonverfahren:</h5>', '</div>')
194
sound = gutils.strip_tags(sound)
195
sound = re.sub('[\n]+', '', sound)
196
sound = re.sub('[ ]+', ' ', sound)
197
sound = sound.rstrip()
198
soundsplit = sound.split(' | ')
199
if len(soundsplit) > 1:
202
for elem in soundsplit:
203
sound += elem + ' | '
204
sound = sound[0:len(sound) - 3]
205
tagline = gutils.trim(self.page, '<h5>Werbezeile:</h5>', '</div>')
206
tagline = self.__before_more(tagline)
207
tagline = gutils.strip_tags(tagline)
208
tagline = re.sub('[\n]+', '', tagline)
209
tagline = re.sub('[ ]+', ' ', tagline)
210
tagline = tagline.rstrip()
212
self.notes = "%s: %s\n" %(_('Language').encode('utf8'), language)
214
self.notes += "%s: %s\n" %(gutils.strip_tags(_('<b>Audio</b>').encode('utf8')), sound)
216
self.notes += "%s: %s\n" %(_('Color').encode('utf8'), color)
218
self.notes += "%s: %s\n" %('Tagline', tagline)
220
def get_screenplay(self):
222
parts = re.split('<a href=', gutils.trim(self.cast_page, '>Buch<', '</table>'))
224
for part in parts[1:]:
225
screenplay = gutils.trim(part, '>', '<')
226
if screenplay == 'WGA':
228
screenplay = screenplay.replace(' (geschrieben von)', '')
229
screenplay = screenplay.replace(' (original scenario)', '')
230
screenplay = screenplay.replace(' und<', '<')
231
self.screenplay = self.screenplay + screenplay + ', '
232
if len(self.screenplay) > 2:
233
self.screenplay = self.screenplay[0:len(self.screenplay) - 2]
235
def get_cameraman(self):
236
self.cameraman = '<' + gutils.trim(self.cast_page, '>Kamera<', '</table>')
237
self.cameraman = string.replace(self.cameraman, '(Kamera)', '')
238
self.cameraman = string.replace(self.cameraman, '(nicht im Abspann)', '')
240
def __before_more(self, data):
241
for element in ['>Mehr ansehen<', '>mehr<', '>Full summary<', '>Full synopsis<']:
242
tmp = string.find(data, element)
244
data = data[:tmp] + '>'
247
class SearchPlugin(movie.SearchMovie):
248
PATTERN = re.compile(r"""<a href=['"]/title/tt([0-9]+)/["'](.*?)</tr>""", re.IGNORECASE)
249
PATTERN_POWERSEARCH = re.compile(r"""Here are the [0-9]+ matching titles""")
252
self.original_url_search = 'http://www.imdb.de/find?more=tt&q='
253
self.translated_url_search = 'http://www.imdb.de/find?more=tt&q='
255
self.remove_accents = False
257
def search(self,parent_window):
258
if not self.open_search(parent_window):
260
tmp = gutils.trim(self.page, ' angezeigt)', ' Treffergenauigkeit')
262
if self.PATTERN_POWERSEARCH.search(self.page) is None:
266
# correction of all &#xxx entities
267
self.page = self.page.decode('iso8859-1')
268
self.page = gutils.convert_entities(self.page)
269
#self.page = self.page.encode(self.encode)
272
def get_searches(self):
273
elements = string.split(self.page, '<tr>')
274
if len(elements) < 2:
275
elements = string.split(self.page, '<TR>')
278
for element in elements[1:]:
279
match = self.PATTERN.findall(element)
281
tmp = re.sub('^[0-9]+[.]', '', gutils.clean(gutils.after(entry[1], '>')))
282
self.ids.append(entry[0])
283
self.titles.append(tmp)
288
class SearchPluginTest(SearchPlugin):
290
# Configuration for automated tests:
291
# dict { movie_id -> [ expected result count for original url, expected result count for translated url ] }
293
test_configuration = {
294
'Rocky Balboa' : [ 21, 21 ],
295
'Ein glückliches Jahr' : [ 6, 6 ]
300
# Configuration for automated tests:
301
# dict { movie_id -> dict { arribute -> value } }
303
# value: * True/False if attribute only should be tested for any value
304
# * or the expected value
306
test_configuration = {
308
'title' : 'Rocky Balboa',
309
'o_title' : 'Rocky Balboa',
310
'director' : 'Sylvester Stallone',
312
'cast' : 'Sylvester Stallone' + _(' as ') + 'Rocky Balboa\n\
313
Burt Young' + _(' as ') + 'Paulie\n\
314
Antonio Tarver' + _(' as ') + 'Mason \'The Line\' Dixon\n\
315
Geraldine Hughes' + _(' as ') + 'Marie\n\
316
Milo Ventimiglia' + _(' as ') + 'Robert Balboa Jr.\n\
317
Tony Burton' + _(' as ') + 'Duke\n\
318
A.J. Benza' + _(' as ') + 'L.C.\n\
319
James Francis Kelly III' + _(' as ') + 'Steps\n\
320
Talia Shire' + _(' as ') + 'Adrian (Archivmaterial)\n\
321
Lou DiBella' + _(' as ') + 'als er selbst\n\
322
Mike Tyson' + _(' as ') + 'als er selbst\n\
323
Henry G. Sanders' + _(' as ') + 'Martin\n\
324
Pedro Lovell' + _(' as ') + 'Spider Rico\n\
325
Ana Gerena' + _(' as ') + 'Isabel\n\
326
Angela Boyd' + _(' as ') + 'Angie\n\
327
Louis Giansante' + _(' as ') + 'Bar Thug\n\
328
Maureen Schilling' + _(' as ') + 'Lucky\'s Bartender\n\
329
Lahmard J. Tate' + _(' as ') + 'X-Cell (als Lahmard Tate)\n\
330
Woody Paige' + _(' as ') + 'ESPN Commentator (als Woodrow W. Paige)\n\
331
Skip Bayless' + _(' as ') + 'ESPN Commentator\n\
332
Jay Crawford' + _(' as ') + 'ESPN Commentator\n\
333
Brian Kenny' + _(' as ') + 'ESPN Host\n\
334
Dana Jacobson' + _(' as ') + 'ESPN Host\n\
335
Charles Johnson' + _(' as ') + 'ESPN Host (als Chuck Johnson)\n\
336
James Binns' + _(' as ') + 'Commissioner (als James J. Binns)\n\
337
Johnnie Hobbs Jr.' + _(' as ') + 'Commissioner\n\
338
Barney Fitzpatrick' + _(' as ') + 'Commissioner\n\
339
Jim Lampley' + _(' as ') + 'HBO Commentator\n\
340
Larry Merchant' + _(' as ') + 'HBO Commentator\n\
341
Max Kellerman' + _(' as ') + 'HBO Commentator\n\
342
LeRoy Neiman' + _(' as ') + 'als er selbst\n\
343
Bert Randolph Sugar' + _(' as ') + 'Ring Magazine Reporter\n\
344
Bernard Fernández' + _(' as ') + 'Boxing Association of America Writer (als Bernard Fernandez)\n\
345
Gunnar Peterson' + _(' as ') + 'Weightlifting Trainer\n\
346
Yahya' + _(' as ') + 'Dixon\'s Opponent\n\
347
Marc Ratner' + _(' as ') + 'Weigh-In Official\n\
348
Anthony Lato Jr.' + _(' as ') + 'Rocky\'s Inspector\n\
349
Jack Lazzarado' + _(' as ') + 'Dixon\'s Inspector\n\
350
Michael Buffer' + _(' as ') + 'Ring Announcer\n\
351
Joe Cortez' + _(' as ') + 'Schiedsrichter\n\
352
Carter Mitchell' + _(' as ') + 'Shamrock Foreman\n\
353
Vinod Kumar' + _(' as ') + 'Ravi\n\
354
Fran Pultro' + _(' as ') + 'Father at Restaurant\n\
355
Frank Stallone' + _(' as ') + 'Dinner Patron (als Frank Stallone Jr.)\n\
356
Jody Giambelluca' + _(' as ') + 'Dinner Patron\n\
357
Tobias Segal' + _(' as ') + 'Robert\'s Friend\n\
358
Tim Carr' + _(' as ') + 'Robert\'s Friend\n\
359
Matt Frack' + _(' as ') + 'Robert\'s Friend #3\n\
360
Paul Dion Monte' + _(' as ') + 'Robert\'s Friend\n\
361
Kevin King Templeton' + _(' as ') + 'Robert\'s Friend (als Kevin King-Templeton)\n\
362
Robert Michael Kelly' + _(' as ') + 'Mr. Tomilson\n\
363
Rick Buchborn' + _(' as ') + 'Rocky Fan\n\
364
Nick Baker' + _(' as ') + 'Irish Pub Bartender\n\
365
Don Sherman' + _(' as ') + 'Andy\n\
366
Stu Nahan' + _(' as ') + 'Computer Fight Commentator (Sprechrolle)\n\
367
Gary Compton' + _(' as ') + 'Sicherheitsbediensteter\n\
368
Tony Devon' + _(' as ') + 'Neighbor übrige Besetzung in alphabetischer Reihenfolge:\n\
369
Vale Anoai' + _(' as ') + 'Shopper in Italian Market\n\
370
Michael Ahl' + _(' as ') + 'Restaurant Patron (nicht im Abspann)\n\
371
Andrew Aninsman' + _(' as ') + 'Promoter (nicht im Abspann)\n\
372
Ben Bachelder' + _(' as ') + 'The Arm (nicht im Abspann)\n\
373
Lacy Bevis' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
374
Tim Brooks' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
375
D.T. Carney' + _(' as ') + 'High Roller (nicht im Abspann)\n\
376
Ricky Cavazos' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
377
Rennie Cowan' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
378
Peter Defeo' + _(' as ') + 'Vendor (nicht im Abspann)\n\
379
Deon Derrico' + _(' as ') + 'High Roller at Limo (nicht im Abspann)\n\
380
Jacob \'Stitch\' Duran' + _(' as ') + 'Dixon\'s Trainer (nicht im Abspann)\n\
381
Ruben Fischman' + _(' as ') + 'High-Roller in Las Vegas (nicht im Abspann)\n\
382
David Gere' + _(' as ') + 'Patron at Adrian\'s (nicht im Abspann)\n\
383
Noah Jacobs' + _(' as ') + 'Boxing Fan (nicht im Abspann)\n\
384
Mark J. Kilbane' + _(' as ') + 'Businessman (nicht im Abspann)\n\
385
Zach Klinefelter' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
386
David Kneeream' + _(' as ') + 'Adrian\'s Patron (nicht im Abspann)\n\
387
Dolph Lundgren' + _(' as ') + 'Captain Ivan Drago (Archivmaterial) (nicht im Abspann)\n\
388
Dean Mauro' + _(' as ') + 'Sports Journalist (nicht im Abspann) (unbestätigt)\n\
389
Burgess Meredith' + _(' as ') + 'Mickey Goldmill (Archivmaterial) (nicht im Abspann)\n\
390
Dan Montero' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
391
Keith Moyer' + _(' as ') + 'Bargast (nicht im Abspann)\n\
392
Mr. T' + _(' as ') + 'Clubber Lang (Archivmaterial) (nicht im Abspann)\n\
393
Jacqueline Olivia' + _(' as ') + 'Mädchen (nicht im Abspann)\n\
394
Brian H. Scott' + _(' as ') + 'Ringside Cop #1 (nicht im Abspann)\n\
395
Jackie Sereni' + _(' as ') + 'Girl on Steps (nicht im Abspann)\n\
396
Keyon Smith' + _(' as ') + 'Boxing Spectator (nicht im Abspann)\n\
397
Frank Traynor' + _(' as ') + 'Rechtsanwalt (nicht im Abspann)\n\
398
Kimberly Villanova' + _(' as ') + 'Businesswoman (nicht im Abspann)',
400
'genre' : 'Drama | Sport',
401
'classification' : False,
402
'studio' : 'Metro-Goldwyn-Mayer (MGM) (presents) (copyright owner), Columbia Pictures (presents) (copyright owner), Revolution Studios (presents) (copyright owner), Chartoff-Winkler Productions, Rogue Marble',
404
'site' : 'http://www.imdb.de/title/tt0479143',
405
'trailer' : 'http://www.imdb.com/title/tt0479143/trailers',
407
'notes' : _('Language') + ': Englisch | Spanisch\n'\
408
+ _('Audio') + ': DTS | Dolby Digital | SDDS\n'\
409
+ _('Color') + ': Farbe',
413
'screenplay' : 'Sylvester Stallone, Sylvester Stallone',
414
'cameraman' : 'Clark Mathis'
417
'title' : 'Ein glückliches Jahr',
418
'o_title' : 'La bonne année',
419
'director' : 'Claude Lelouch',
421
'cast' : 'Lino Ventura' + _(' as ') + 'Simon\n\
422
Françoise Fabian' + _(' as ') + 'Françoise\n\
423
Charles Gérard' + _(' as ') + 'Charlot\n\
424
André Falcon' + _(' as ') + 'Le bijoutier\n\
425
Mireille Mathieu' + _(' as ') + 'als sie selbst / Elle-même\n\
426
Lilo' + _(' as ') + 'Madame Félix\n\
427
Claude Mann' + _(' as ') + 'L\'intellectuel\n\
428
Frédéric de Pasquale' + _(' as ') + 'L\'amant parisien\n\
429
Gérard Sire' + _(' as ') + 'Le directeur de la prison\n\
430
Silvano Tranquilli' + _(' as ') + 'L\'amant italien\n\
433
Norman de la Chesnaye\n\
436
Michou' + _(' as ') + 'als er selbst\n\
437
Bettina Rheims' + _(' as ') + 'La jeune vendeuse\n\
441
Harry Walter übrige Besetzung in alphabetischer Reihenfolge:\n\
442
Anouk Aimée' + _(' as ') + 'Une femme (Archivmaterial) (nicht im Abspann)\n\
443
Elie Chouraqui' + _(' as ') + ' (nicht im Abspann)\n\
444
Rémy Julienne' + _(' as ') + 'Chauffeur de taxi (nicht im Abspann)\n\
445
Jean-Louis Trintignant' + _(' as ') + 'Un homme (Archivmaterial) (nicht im Abspann)',
446
'country' : 'Frankreich | Italien',
448
'classification' : '12 (f)',
449
'studio' : 'Les Films 13, Rizzoli Film',
451
'site' : 'http://www.imdb.de/title/tt0069815',
452
'trailer' : 'http://www.imdb.com/title/tt0069815/trailers',
454
'notes' : _('Language') + ': Französisch\n'\
455
+ _('Audio') + ': Mono\n'\
456
+ _('Color') + ': Farbe (Eastmancolor)',
460
'screenplay' : 'Claude Lelouch, Pierre Uytterhoeven, Claude Lelouch',
461
'cameraman' : 'Claude Lelouch'