~jstys-z/helioviewer.org/client5

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
"""HTTP data browser"""
import os
import urllib
from sgmllib import SGMLParser
from hvpull.browser.basebrowser import BaseDataBrowser

class HTTPDataBrowser(BaseDataBrowser):
    def __init__(self, uri):
        BaseDataBrowser.__init__(self, uri)

    def get_directories(self, location):
        """Get a list of directories at the root of the dataprovider.  
        We assume that these directories are in fact a list of instrument
        nicknames."""
        return filter(lambda url: url.endswith("/"), self._query(location))
    
    def get_files(self, location, extension):
        """Get all the files that end with specified extension at the uri"""
        return filter(lambda url: url.endswith("." + extension), 
                      self._query(location))
    
    def _query(self, location):
        """Get a list of files and folders at the specified remote location"""
        # query the remote location for the list of files and subdirectories 
        url_lister = URLLister()
        result = url_lister.read(location)
        url_lister.close()

        urls = filter(lambda url: url[0] != "/" and url[0] != "?", result)
        
        return [os.path.join(location, url) for url in urls]
    
class URLLister(SGMLParser):
    '''
    Created on Nov 1, 2011
    @author: Jack Ireland <jack.ireland@nasa.gov>
    copied from the original version of the download code.
    '''
    def __init__(self):
        """Create a new URLLister"""
        SGMLParser.__init__(self)
        self.urls = []

    def read(self, uri):
        """Read a URI and return a list of files/directories"""
        usock = urllib.urlopen(uri)
        self.feed(usock.read())
        usock.close()
        
        return self.urls
        
    def reset(self):
        """Reset state of URLLister"""
        SGMLParser.reset(self)
        self.urls = []

    def start_a(self, attrs):
        href = [v for k, v in attrs if k == 'href']
        if href:
            self.urls.extend(href)