1
# -*- Mode: python; coding: utf-8; tab-width: 8; indent-tabs-mode: t; -*-
3
# Copyright (C) 2008, 2009, 2010 Edgar Luna
5
# This program is free software; you can redistribute it and/or modify
6
# it under the terms of the GNU General Public License as published by
7
# the Free Software Foundation; either version 2, or (at your option)
10
# The Rhythmbox authors hereby grant permission for non-GPL compatible
11
# GStreamer plugins to be used and distributed together with GStreamer
12
# and Rhythmbox. This permission is above and beyond the permissions granted
13
# by the GPL license by which Rhythmbox is covered. If you modify this code
14
# you may extend this exception to your version of the code, but you are not
15
# obligated to do so. If you do not wish to do so, delete this exception
16
# statement from your version.
18
# This program is distributed in the hope that it will be useful,
19
# but WITHOUT ANY WARRANTY; without even the implied warranty of
20
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21
# GNU General Public License for more details.
23
# You should have received a copy of the GNU General Public License
24
# along with this program; if not, write to the Free Software
25
# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
34
class DarkLyricsParser (object):
35
"""Parser for Lyrics from www.darklyrics.com"""
38
def __init__(self, artist, title):
41
self.artist_ascii = ''
44
def search(self, callback, *data):
45
"""Do a request of a specific url based on artist's first letter name."""
47
self.artist_ascii = ''.join(c for c in self.artist.lower() \
48
if c in string.ascii_letters)
49
self.artist_ascii = self.artist_ascii.lower()
50
firstcharurl = 'http://www.darklyrics.com/%s.html' % (self.artist_ascii[0])
52
loader.get_url (firstcharurl, self.search_artist, callback, *data)
54
def search_artist(self, artist_page, callback, *data):
55
"""Search for the link to the page of artist in artists_page
57
if artist_page is None:
58
callback (None, *data)
61
link_section = re.split ('<SCRIPT LANGUAGE="javascript" src="tban2.js"></SCRIPT>',
63
pattern_link = '<a href="'
64
pattern_artist = '([^"]*)">*([^<]*)</a><br><br>'
65
links = re.split (pattern_link, link_section.lower())
69
artist = re.findall(pattern_artist, line)
72
artist_link, artist_name = artist[0]
73
artist_url = 'http://www.darklyrics.com/%s' % (artist_link)
74
if artist_link[:5] == 'http:':
76
artist_name = artist_name.strip()
77
smvalue = rb.string_match (artist_name, self.artist_ascii)
78
if smvalue > min_artist_match:
79
best_match = (smvalue, artist_url, artist_name)
82
# Lyrics are located in external site
83
callback (None, *data)
86
self.artist = best_match[2]
87
loader.get_url (best_match[1], self.search_song, callback, *data)
89
class SongFound (object):
90
def __init__ (self, smvalue, title, number, album, artist):
91
self.smvalue = smvalue
98
return '(' + str(self.smvalue) + '. ' + self.title + '. ' + self.album + '. ' + self.artist + ')'
100
def search_song (self, songlist, callback, *data):
101
"""If artist's page is found, search_song looks for the song.
103
The artist page contains a list of all the albums and
104
links to the songs lyrics from this.
107
callback (None, *data)
109
# Search for all the <a>
110
# filter for those that has the artist name string_match
111
# and for those which its content is artist string_match
112
# Sort by values given from string_match
114
link_section = re.split('LYRICS<BR></FONT>', songlist)[1]
115
link_section = link_section.lower()
116
pattern_song = '<a href="../lyrics/(.*)/(.*).html#([^"]+)" target="_blank"><FONT COLOR="#CCCCCC">(.*)</FONT></a><br>'
117
matches = re.findall (pattern_song.lower(), link_section)
120
artist, album, number, title = line
121
smvalue = rb.string_match (title.lower().replace(' ', '' ),
122
self.title.lower().replace(' ', ''))
123
if smvalue > min_song_match:
124
best_match = self.SongFound(smvalue,
130
callback (None, *data)
132
loader = rb.Loader ()
133
url = 'http://www.darklyrics.com/lyrics/%s/%s.html' % (best_match.artist, best_match.album)
134
self.title = best_match.title
135
self.titlenumber = best_match.number
136
loader.get_url (url, self.parse_lyrics, callback, *data)
138
def parse_lyrics (self, album, callback, *data):
139
"""In the album's page parse_lyrics get the lyrics of the song.
141
This page contains all the lyrics for self.album, but
142
this method get rides of everything that isn't the
143
lyrics of self.title"""
145
callback (None, *data)
147
titleline = '(?mis)<a name=%s><FONT color=#DDDDDD><b>%s. %s</b></font>(.+?)<[a|f]' % \
148
(self.titlenumber, self.titlenumber, re.escape(self.title.title()))
149
lyricmatch = re.split (titleline, album)
150
if len (lyricmatch) > 1:
151
lyrics = lyricmatch[1]
152
lyrics = lyrics.replace ('\r', "")
153
lyrics = re.sub (r'<.*?>', "", lyrics)
154
lyrics = lyrics.strip ("\n")
155
title = "%s - %s\n\n" % (self.artist.title(), self.title.title())
157
lyrics = title + str (lyrics)
158
lyrics += "\n\nLyrics provided by Dark Lyrics"
159
callback (lyrics, *data)
161
callback (None, *data)