~awuerl/blitzortung-python/master

« back to all changes in this revision

Viewing changes to blitzortung/dataimport.py

  • Committer: Andreas Würl
  • Date: 2013-10-21 10:26:34 UTC
  • Revision ID: git-v1:7b4adbf6d36c2e6e0903e89f194d43db57096bf2
added spinx documentation
replaced inheritance of data provider with delegation

Show diffs side-by-side

added added

removed removed

Lines of Context:
5
5
@author: awuerl
6
6
 
7
7
"""
 
8
import os
8
9
import logging
9
10
import time
10
11
import urllib2
78
79
        return data_string
79
80
 
80
81
 
 
82
class BlitzortungDataUrl(object):
 
83
    default_host_name = 'data'
 
84
    default_region = 1
 
85
 
 
86
    target_url = 'http://%(host_name)s.blitzortung.org/Data_%(region)d/Protected/'
 
87
 
 
88
    def build_url(self, url_path, **kwargs):
 
89
        url_parameters = kwargs
 
90
 
 
91
        if 'host_name' not in url_parameters:
 
92
            url_parameters['host_name'] = self.default_host_name
 
93
 
 
94
        if 'region' not in url_parameters:
 
95
            url_parameters['region'] = self.default_region
 
96
 
 
97
        return os.path.join(self.target_url, url_path) % url_parameters
 
98
 
 
99
@singleton
81
100
class BlitzortungDataProvider(object):
82
 
    host = 'http://data.blitzortung.org'
83
 
    target_url = host + '/Data_%(region)d/Protected/%(url_path)s'
84
101
    logger = logging.getLogger(__name__)
85
102
 
86
 
    def __init__(self, http_data_transport, data_transformer, url_path=None):
 
103
    @inject(http_data_transport=HttpDataTransport, data_transformer=BlitzortungDataTransformer)
 
104
    def __init__(self, http_data_transport, data_transformer):
87
105
        self.http_data_transport = http_data_transport
88
106
        self.data_transformer = data_transformer
89
 
        self.url_parameters = {
90
 
            'region': 1,
91
 
            'url_path': url_path if url_path else '',
92
 
        }
93
 
 
94
 
    def read_data(self, **kwargs):
95
 
        url_parameters = self.url_parameters.copy()
96
 
        url_parameters.update(kwargs)
97
 
 
98
 
        target_url = self.target_url % url_parameters
99
 
 
100
 
        response = self.http_data_transport.read_from_url(target_url)
 
107
 
 
108
    def read_text(self, target_url):
 
109
 
 
110
        return self.http_data_transport.read_from_url(target_url)
 
111
 
 
112
    def read_data(self, target_url, post_process=None):
 
113
        response = self.read_text(target_url)
101
114
 
102
115
        result = []
103
116
 
104
117
        if response:
105
 
            response = self.process(response)
 
118
            if post_process:
 
119
                response = post_process(response)
106
120
 
107
121
            for entry_text in response.split('\n'):
108
122
                entry_text = entry_text.strip()
115
129
 
116
130
        return result
117
131
 
118
 
    def process(self, data):
119
 
        return data
120
 
 
121
 
 
122
 
class BlitzortungStrokeUrlGenerator(object):
 
132
 
 
133
 
 
134
class BlitzortungHistoryUrlGenerator(object):
123
135
    url_path_minute_increment = 10
124
 
    url_path_format = 'Strokes/%Y/%m/%d/%H/%M.log'
 
136
    url_path_format = '%Y/%m/%d/%H/%M.log'
125
137
 
126
138
    def __init__(self):
127
139
        self.duration = datetime.timedelta(minutes=self.url_path_minute_increment)
132
144
 
133
145
 
134
146
@singleton
135
 
class StrokesBlitzortungDataProvider(BlitzortungDataProvider):
 
147
class StrokesBlitzortungDataProvider(object):
136
148
    logger = logging.getLogger(__name__)
137
149
 
138
 
    @inject(data_transport=HttpDataTransport, data_transformer=BlitzortungDataTransformer,
139
 
            url_path_generator=BlitzortungStrokeUrlGenerator, stroke_builder=blitzortung.builder.Stroke)
140
 
    def __init__(self, data_transport, data_transformer, url_path_generator, stroke_builder):
141
 
        super(StrokesBlitzortungDataProvider, self).__init__(data_transport, data_transformer, None)
 
150
    @inject(data_provider=BlitzortungDataProvider, data_url=BlitzortungDataUrl,
 
151
            url_path_generator=BlitzortungHistoryUrlGenerator, stroke_builder=blitzortung.builder.Stroke)
 
152
    def __init__(self, data_provider, data_url, url_path_generator, stroke_builder):
 
153
        self.data_provider = data_provider
 
154
        self.data_url = data_url
142
155
        self.url_path_generator = url_path_generator
143
156
        self.stroke_builder = stroke_builder
144
157
 
149
162
        for url_path in self.url_path_generator.get_url_paths(latest_stroke):
150
163
            initial_stroke_count = len(strokes_since)
151
164
            start_time = time.time()
152
 
            for stroke_data in self.read_data(region=region, url_path=url_path):
 
165
            target_url = self.data_url.build_url(os.path.join('Strokes', url_path), region=region)
 
166
            for stroke_data in self.data_provider.read_data(target_url):
153
167
                try:
154
168
                    stroke = self.stroke_builder.from_data(stroke_data).build()
155
169
                except blitzortung.builder.BuilderError as e:
163
177
                if latest_stroke < timestamp:
164
178
                    strokes_since.append(stroke)
165
179
            end_time = time.time()
166
 
            self.logger.debug("imported %d strokes in %.2fs from %s", len(strokes_since) - initial_stroke_count,
167
 
                              end_time - start_time, url_path)
 
180
            self.logger.debug("imported %d strokes for region %d in %.2fs from %s", len(strokes_since) - initial_stroke_count,
 
181
                              region, end_time - start_time, url_path)
168
182
        return strokes_since
169
183
 
170
184
 
175
189
 
176
190
 
177
191
@singleton
178
 
class StationsBlitzortungDataProvider(BlitzortungDataProvider):
 
192
class StationsBlitzortungDataProvider(object):
179
193
    logger = logging.getLogger(__name__)
180
194
 
181
 
    @inject(data_transport=HttpDataTransport, data_transformer=BlitzortungDataTransformer,
 
195
    @inject(data_provider=BlitzortungDataProvider, data_url=BlitzortungDataUrl,
182
196
            station_builder=blitzortung.builder.Station)
183
 
    def __init__(self, data_transport, data_transformer, station_builder):
184
 
        super(StationsBlitzortungDataProvider, self).__init__(data_transport, data_transformer, 'stations.txt.gz')
 
197
    def __init__(self, data_provider, data_url, station_builder):
 
198
        self.data_provider = data_provider
 
199
        self.data_url = data_url
185
200
        self.station_builder = station_builder
186
201
 
187
202
    def get_stations(self, region=1):
188
203
        current_stations = []
189
 
        for station_data in self.read_data(region=region):
 
204
        target_url = self.data_url.build_url('stations.txt.gz', region=region)
 
205
        for station_data in self.data_provider.read_data(target_url, post_process=self.post_process):
190
206
            try:
191
207
                current_stations.append(self.station_builder.from_data(station_data).build())
192
208
            except blitzortung.builder.BuilderError:
193
209
                self.logger.debug("error parsing station data '%s'" % station_data)
194
210
        return current_stations
195
211
 
196
 
    def process(self, data):
 
212
    def post_process(self, data):
197
213
        data = cStringIO.StringIO(data)
198
214
        return gzip.GzipFile(fileobj=data).read()
199
215
 
206
222
 
207
223
@singleton
208
224
class RawSignalsBlitzortungDataProvider(BlitzortungDataProvider):
209
 
    @inject(data_transport=HttpDataTransport, data_transformer=BlitzortungDataTransformer)
210
 
    def __init__(self, data_transport, data_transformer):
211
 
        super(RawSignalsBlitzortungDataProvider, self).__init__(data_transport, data_transformer,
212
 
                                                                'raw_data/%(station_id)s/%(hour)02d.log')
213
 
        # http://signals.blitzortung.org/Data_1/<station_id>/2013/09/28/20/00.log
 
225
    @inject(data_transport=HttpDataTransport, data_transformer=BlitzortungDataTransformer,
 
226
            waveform_builder=blitzortung.builder.RawWaveformEvent)
 
227
    def __init__(self, data_transport, data_transformer, waveform_builder):
 
228
        super(RawSignalsBlitzortungDataProvider, self).__init__(
 
229
            data_transport,
 
230
            data_transformer,
 
231
            base_url='http://signals.blitzortung.org/Data_%(region)d/%(station_id)'
 
232
        )
 
233
        self.waveform_builder = waveform_builder
214
234
 
215
235
    def set_station_id(self, station_id):
216
236
        self.set_url_parameter('station_id', station_id)
218
238
    def set_hour(self, hour):
219
239
        self.set_url_parameter('hour', hour)
220
240
 
 
241
    def get_raw_data_since(self, latest_data, region, station_id):
 
242
        self.logger.debug("import raw data since %s" % latest_data)
 
243
 
 
244
        raw_data = []
 
245
 
 
246
        for url_path in self.url_path_generator.get_url_paths(latest_data):
 
247
            data = self.read_data(url_path, region=region, station_id=station_id)
 
248
            for line in data.split('\n'):
 
249
                self.waveform_builder.from_string(line.strip())
 
250
                raw_data.append(self.waveform_builder.build())
 
251
 
 
252
        return raw_data
 
253
 
221
254
 
222
255
def raw():
223
256
    from __init__ import INJECTOR