1
"""HTTP data browser"""
4
from sgmllib import SGMLParser
5
from hvpull.browser.basebrowser import BaseDataBrowser
7
class HTTPDataBrowser(BaseDataBrowser):
8
def __init__(self, uri):
9
BaseDataBrowser.__init__(self, uri)
11
def get_directories(self, location):
12
"""Get a list of directories at the root of the dataprovider.
13
We assume that these directories are in fact a list of instrument
15
return filter(lambda url: url.endswith("/"), self._query(location))
17
def get_files(self, location, extension):
18
"""Get all the files that end with specified extension at the uri"""
19
return filter(lambda url: url.endswith("." + extension),
20
self._query(location))
22
def _query(self, location):
23
"""Get a list of files and folders at the specified remote location"""
24
# query the remote location for the list of files and subdirectories
25
url_lister = URLLister()
26
result = url_lister.read(location)
29
urls = filter(lambda url: url[0] != "/" and url[0] != "?", result)
31
return [os.path.join(location, url) for url in urls]
33
class URLLister(SGMLParser):
35
Created on Nov 1, 2011
36
@author: Jack Ireland <jack.ireland@nasa.gov>
37
copied from the original version of the download code.
40
"""Create a new URLLister"""
41
SGMLParser.__init__(self)
45
"""Read a URI and return a list of files/directories"""
46
usock = urllib.urlopen(uri)
47
self.feed(usock.read())
53
"""Reset state of URLLister"""
54
SGMLParser.reset(self)
57
def start_a(self, attrs):
58
href = [v for k, v in attrs if k == 'href']
60
self.urls.extend(href)