1
"""HTTP data browser"""
4
from sgmllib import SGMLParser
5
from helioviewer.hvpull.browser.basebrowser import BaseDataBrowser
7
class HTTPDataBrowser(BaseDataBrowser):
8
def __init__(self, server):
9
BaseDataBrowser.__init__(self, server)
11
def get_directories(self, start_date, end_date):
12
"""Generates a list of remote directories which may be queried
13
for files corresponding to the requested range. Note that these
14
directories do not necessarily exist on the remote server."""
15
# filter(lambda url: url.endswith("/"), self._query(location))
16
return self.server.compute_directories(start_date, end_date)
18
def get_files(self, location, extension):
19
"""Get all the files that end with specified extension at the uri"""
21
files = filter(lambda url: url.endswith("." + extension),
22
self._query(location))
28
def _query(self, location):
29
"""Get a list of files and folders at the specified remote location"""
30
# query the remote location for the list of files and subdirectories
31
url_lister = URLLister()
32
result = url_lister.read(location)
35
urls = filter(lambda url: url[0] != "/" and url[0] != "?", result)
37
return [os.path.join(location, url) for url in urls]
39
class URLLister(SGMLParser):
41
Created on Nov 1, 2011
42
@author: Jack Ireland <jack.ireland@nasa.gov>
43
copied from the original version of the download code.
46
"""Create a new URLLister"""
47
SGMLParser.__init__(self)
51
"""Read a URI and return a list of files/directories"""
52
usock = urllib.urlopen(uri)
53
self.feed(usock.read())
59
"""Reset state of URLLister"""
60
SGMLParser.reset(self)
63
def start_a(self, attrs):
64
href = [v for k, v in attrs if k == 'href']
66
self.urls.extend(href)