~cosmin.lupu/+junk/penguintv

« back to all changes in this revision

Viewing changes to penguintv/itunes.py

  • Committer: cosmin.lupu at gmail
  • Date: 2010-04-27 16:47:43 UTC
  • Revision ID: cosmin.lupu@gmail.com-20100427164743-ds8xrqonipp5ovdf
initial packaging

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
# itunes.py
 
2
# Written by Owen Williams, (c) 2007
 
3
# see LICENSE for license information
 
4
#
 
5
# iTunes has very strange weblinks, but they are not that hard to read.
 
6
# A "viewPodcast" link returns a gzipped web page that contains a link that
 
7
# iTunes can load.  Although the protocol of this link is itms://, we can
 
8
# load it with http.  This time we get a gzipped xml file, and toward the
 
9
# bottom of the file is a simple key / value pair for episodeURL.  This
 
10
# url is what the podcast author has told itunes to use, and it'll be regular
 
11
# RSS (we hope).
 
12
 
 
13
 
 
14
import sys
 
15
import gzip
 
16
import urllib
 
17
import HTMLParser
 
18
import logging
 
19
 
 
20
from xml.sax import saxutils, make_parser
 
21
from xml.sax.handler import feature_namespaces
 
22
 
 
23
def is_itms_url(url):
 
24
        if url.lower().startswith("itms://"):
 
25
                return True
 
26
 
 
27
def is_itunes_url(url):
 
28
        """ Two simple checks to see if this is a valid itunes url:
 
29
                (ie, http://phobos.apple.com/WebObjects/MZStore.woa/wa/viewPodcast?id=207870198)
 
30
            * does it contain "phobos.apple.com", and
 
31
            * does it contain "viewPodcast" 
 
32
            
 
33
            There's also another form, as in http://www.itunes.com/podcast?id=207870198"""
 
34
        
 
35
        if url.lower().startswith("itms://"):
 
36
                return True    
 
37
        if "apple.com/" in url.lower() and "viewPodcast" in url:
 
38
                return True
 
39
        if "itunes.com/podcast" in url.lower():
 
40
                return True
 
41
        return False
 
42
 
 
43
def get_rss_from_itunes(url):
 
44
        if not is_itunes_url(url):
 
45
                raise ItunesError, "not an itunes url"
 
46
                
 
47
        if not is_itms_url(url):
 
48
                url2 = get_itms_url(url)
 
49
                return get_podcast_url(url2)
 
50
        else:
 
51
                url2 = url.replace("itms://", "http://")
 
52
                return get_podcast_url(url2)
 
53
                
 
54
def get_itms_url(url):
 
55
        # Part 1, get the itunes "webpage" for this feed
 
56
        # we have to save the file because urlopen doesn't support seeking              
 
57
        filename, message = urllib.urlretrieve(url)
 
58
        #uncompressed = gzip.GzipFile(filename=filename, mode='r')
 
59
        uncompressed = open(filename, 'r')
 
60
 
 
61
        parser = viewPodcastParser()
 
62
        parser.feed(uncompressed.read())
 
63
 
 
64
        if parser.url is None:
 
65
                raise ItunesError, "error getting viewpodcast url from itunes"
 
66
        return parser.url
 
67
 
 
68
def get_podcast_url(url):
 
69
        # Part 2, find the actual rss link in the itunes "webpage"
 
70
        filename, message = urllib.urlretrieve(url)
 
71
        #uncompressed = gzip.GzipFile(filename=filename, mode='r')
 
72
        uncompressed = open(filename, 'r')
 
73
 
 
74
        parser = make_parser()
 
75
        parser.setFeature(feature_namespaces, 0)
 
76
        handler = itunesHandler()
 
77
        parser.setContentHandler(handler)
 
78
        parser.parse(uncompressed)
 
79
 
 
80
        if handler.url is None:
 
81
                raise ItunesError, "error finding podcast url"
 
82
                
 
83
        return handler.url
 
84
 
 
85
class viewPodcastParser(HTMLParser.HTMLParser):
 
86
        def __init__(self):
 
87
                HTMLParser.HTMLParser.__init__(self)
 
88
                self.url = None
 
89
                
 
90
        def handle_starttag(self, tag, attrs):
 
91
                new_attrs = []
 
92
                if tag.upper() == "BODY":
 
93
                        for attr, val in attrs:
 
94
                                if attr == "onload":
 
95
                                        url = val[val.find("itms://") + 4:]
 
96
                                        url = url[:url.find("'")]
 
97
                                        url = "http" + url
 
98
                                        self.url = url
 
99
 
 
100
try:
 
101
        from xml.sax.handler import ContentHandler
 
102
        def_handler = ContentHandler
 
103
except:
 
104
        try:
 
105
                from xml.sax.saxutils import DefaultHandler
 
106
                def_handler = DefaultHandler
 
107
        except Exception, e:
 
108
                logging.error("couldn't get xml parsing")
 
109
                raise e
 
110
                
 
111
class itunesHandler(def_handler):
 
112
        def __init__(self):
 
113
                self.url = ""
 
114
                self._in_key = None
 
115
                self._in_value = None
 
116
                self._last_key = None
 
117
 
 
118
        def startElement(self, name, attrs):
 
119
                if name == 'key':
 
120
                        self._in_key = ""
 
121
                elif name == 'string':
 
122
                        self._in_value = ""
 
123
 
 
124
        def endElement(self, name):
 
125
                if name == 'key':
 
126
                        self._last_key = self._in_key
 
127
                        self._in_key = None
 
128
                elif name == 'string':
 
129
                        if self._last_key == 'feedURL':
 
130
                                self.url = self._in_value
 
131
                        self._in_value = None
 
132
                                
 
133
        def characters(self, ch):
 
134
                if self._in_key is not None:
 
135
                        self._in_key += ch
 
136
                elif self._in_value is not None:
 
137
                        self._in_value += ch
 
138
                        
 
139
class ItunesError(Exception):
 
140
        def __init__(self, m):
 
141
                self.m = m
 
142
        def __str__(self):
 
143
                return m
 
144
 
 
145
 
 
146