1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
|
"""HTTP data browser"""
import os
import urllib
from sgmllib import SGMLParser
from hvpull.browser.basebrowser import BaseDataBrowser
class HTTPDataBrowser(BaseDataBrowser):
def __init__(self, uri):
BaseDataBrowser.__init__(self, uri)
def get_directories(self, location):
"""Get a list of directories at the root of the dataprovider.
We assume that these directories are in fact a list of instrument
nicknames."""
return filter(lambda url: url.endswith("/"), self._query(location))
def get_files(self, location, extension):
"""Get all the files that end with specified extension at the uri"""
return filter(lambda url: url.endswith("." + extension),
self._query(location))
def _query(self, location):
"""Get a list of files and folders at the specified remote location"""
# query the remote location for the list of files and subdirectories
url_lister = URLLister()
result = url_lister.read(location)
url_lister.close()
urls = filter(lambda url: url[0] != "/" and url[0] != "?", result)
return [os.path.join(location, url) for url in urls]
class URLLister(SGMLParser):
'''
Created on Nov 1, 2011
@author: Jack Ireland <jack.ireland@nasa.gov>
copied from the original version of the download code.
'''
def __init__(self):
"""Create a new URLLister"""
SGMLParser.__init__(self)
self.urls = []
def read(self, uri):
"""Read a URI and return a list of files/directories"""
usock = urllib.urlopen(uri)
self.feed(usock.read())
usock.close()
return self.urls
def reset(self):
"""Reset state of URLLister"""
SGMLParser.reset(self)
self.urls = []
def start_a(self, attrs):
href = [v for k, v in attrs if k == 'href']
if href:
self.urls.extend(href)
|