1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
|
# -*- coding: iso-8859-1 -*-
"""
MoinMoin - LogFile package
This module supports buffered log reads, iterating forward and backward line-by-line, etc.
@copyright: 2005-2007 MoinMoin:ThomasWaldmann
@license: GNU GPL, see COPYING for details.
"""
from MoinMoin import log
logging = log.getLogger(__name__)
import os, codecs, errno
from MoinMoin import config, wikiutil
class LogError(Exception):
""" Base class for log errors """
class LogMissing(LogError):
""" Raised when the log is missing """
class LineBuffer:
"""
Reads lines from a file
self.len number of lines in self.lines
self.lines list of lines (unicode)
self.offsets list of file offsets for each line. additionally the position
after the last read line is stored into self.offsets[-1]
"""
def __init__(self, file, offset, size, forward=True):
"""
TODO: when this gets refactored, don't use "file" (is a builtin)
@param file: open file object
@param offset: position in file to start from
@param size: aproximate number of bytes to read
@param forward : read from offset on or from offset-size to offset
@type forward: boolean
"""
self.loglevel = logging.NOTSET
if forward:
begin = offset
logging.log(self.loglevel, "LineBuffer.init: forward seek %d read %d" % (begin, size))
file.seek(begin)
lines = file.readlines(size)
else:
if offset < 2 * size:
begin = 0
size = offset
else:
begin = offset - size
logging.log(self.loglevel, "LineBuffer.init: backward seek %d read %d" % (begin, size))
file.seek(begin)
lines = file.read(size).splitlines(True)
if begin != 0:
# remove potentially incomplete first line
begin += len(lines[0])
lines = lines[1:]
# XXX check for min one line read
linecount = len(lines)
# now calculate the file offsets of all read lines
offsets = [len(line) for line in lines]
offsets.append(0) # later this element will have the file offset after the last read line
lengthpreviousline = 0
offset = begin
for i in xrange(linecount+1):
offset += lengthpreviousline
lengthpreviousline = offsets[i]
offsets[i] = offset
self.offsets = offsets
self.len = linecount
# Decode lines after offset in file is calculated
self.lines = [unicode(line, config.charset) for line in lines]
class LogFile:
"""
.filter: function that gets the values from .parser.
must return True to keep it or False to remove it
Overwrite .parser() and .add() to customize this class to special log files
"""
def __init__(self, filename, buffer_size=4096):
"""
@param filename: name of the log file
@param buffer_size: approx. size of one buffer in bytes
"""
self.loglevel = logging.NOTSET
self.__filename = filename
self.__buffer = None # currently used buffer, points to one of the following:
self.__buffer1 = None
self.__buffer2 = None
self.buffer_size = buffer_size
self.__lineno = 0
self.filter = None
def __iter__(self):
return self
def reverse(self):
""" yield log entries in reverse direction starting from last one
@rtype: iterator
"""
self.to_end()
while 1:
try:
logging.log(self.loglevel, "LogFile.reverse %s" % self.__filename)
result = self.previous()
except StopIteration:
return
yield result
def sanityCheck(self):
""" Check for log file write access.
@rtype: string (error message) or None
"""
if not os.access(self.__filename, os.W_OK):
return "The log '%s' is not writable!" % (self.__filename, )
return None
def __getattr__(self, name):
"""
generate some attributes when needed
"""
if name == "_LogFile__rel_index": # Python black magic: this is the real name of the __rel_index attribute
# starting iteration from begin
self.__buffer1 = LineBuffer(self._input, 0, self.buffer_size)
self.__buffer2 = LineBuffer(self._input,
self.__buffer1.offsets[-1],
self.buffer_size)
self.__buffer = self.__buffer1
self.__rel_index = 0
return 0
elif name == "_input":
try:
# Open the file (NOT using codecs.open, it breaks our offset calculation. We decode it later.).
# Use binary mode in order to retain \r - otherwise the offset calculation would fail.
self._input = file(self.__filename, "rb", )
except IOError, err:
if err.errno == errno.ENOENT: # "file not found"
# XXX workaround if edit-log does not exist: just create it empty
# if this workaround raises another error, we don't catch
# it, so the admin will see it.
f = file(self.__filename, "ab")
f.write('')
f.close()
self._input = file(self.__filename, "rb", )
else:
logging.error("logfile: %r IOERROR errno %d (%s)" % (self.__filename, err.errno, os.strerror(err.errno)))
raise
return self._input
elif name == "_output":
self._output = codecs.open(self.__filename, 'a', config.charset)
return self._output
else:
raise AttributeError(name)
def size(self):
""" Return log size in bytes
Return 0 if the file does not exist. Raises other OSError.
@return: size of log file in bytes
@rtype: Int
"""
try:
return os.path.getsize(self.__filename)
except OSError, err:
if err.errno == errno.ENOENT:
return 0
raise
def lines(self):
""" Return number of lines in the log file
Return 0 if the file does not exist. Raises other OSError.
Expensive for big log files - O(n)
@return: size of log file in lines
@rtype: Int
"""
try:
f = file(self.__filename, 'r')
try:
count = 0
for line in f:
count += 1
return count
finally:
f.close()
except (OSError, IOError), err:
if err.errno == errno.ENOENT:
return 0
raise
def date(self):
# ToDo check if we need this method
""" Return timestamp of log file in usecs """
try:
mtime = os.path.getmtime(self.__filename)
except OSError, err:
if err.errno == errno.ENOENT:
# This can happen on fresh wiki when building the index
# Usually the first request will create an event log
raise LogMissing(str(err))
raise
return wikiutil.timestamp2version(mtime)
def peek(self, lines):
""" Move position in file forward or backwards by "lines" count
It adjusts .__lineno if set.
This function is not aware of filters!
@param lines: number of lines, may be negative to move backward
@rtype: boolean
@return: True if moving more than to the beginning and moving
to the end or beyond
"""
logging.log(self.loglevel, "LogFile.peek %s" % self.__filename)
self.__rel_index += lines
while self.__rel_index < 0:
if self.__buffer is self.__buffer2:
if self.__buffer.offsets[0] == 0:
# already at the beginning of the file
self.__rel_index = 0
self.__lineno = 0
return True
else:
# change to buffer 1
self.__buffer = self.__buffer1
self.__rel_index += self.__buffer.len
else: # self.__buffer is self.__buffer1
if self.__buffer.offsets[0] == 0:
# already at the beginning of the file
self.__rel_index = 0
self.__lineno = 0
return True
else:
# load previous lines
self.__buffer2 = self.__buffer1
self.__buffer1 = LineBuffer(self._input,
self.__buffer.offsets[0],
self.buffer_size,
forward=False)
self.__buffer = self.__buffer1
self.__rel_index += self.__buffer.len
while self.__rel_index >= self.__buffer.len:
if self.__buffer is self.__buffer1:
# change to buffer 2
self.__rel_index -= self.__buffer.len
self.__buffer = self.__buffer2
else: # self.__buffer is self.__buffer2
# try to load next buffer
tmpbuff = LineBuffer(self._input,
self.__buffer.offsets[-1],
self.buffer_size)
if tmpbuff.len == 0:
# end of file
if self.__lineno is not None:
self.__lineno += (lines -
(self.__rel_index - self.__buffer.len))
self.__rel_index = self.__buffer.len # point to after last read line
return True
# shift buffers
self.__rel_index -= self.__buffer.len
self.__buffer1 = self.__buffer2
self.__buffer2 = tmpbuff
self.__buffer = self.__buffer2
if self.__lineno is not None:
self.__lineno += lines
return False
def __next(self):
"""get next line already parsed"""
if self.peek(0):
raise StopIteration
result = self.parser(self.__buffer.lines[self.__rel_index])
self.peek(1)
return result
def next(self):
"""get next line that passes through the filter
@return: next entry
raises StopIteration at file end
"""
result = None
while result is None:
while result is None:
logging.log(self.loglevel, "LogFile.next %s" % self.__filename)
result = self.__next()
if self.filter and not self.filter(result):
result = None
return result
def __previous(self):
"""get previous line already parsed"""
if self.peek(-1):
raise StopIteration
return self.parser(self.__buffer.lines[self.__rel_index])
def previous(self):
"""get previous line that passes through the filter
@return: previous entry
raises StopIteration at file begin
"""
result = None
while result is None:
while result is None:
logging.log(self.loglevel, "LogFile.previous %s" % self.__filename)
result = self.__previous()
if self.filter and not self.filter(result):
result = None
return result
def to_begin(self):
"""moves file position to the begin"""
logging.log(self.loglevel, "LogFile.to_begin %s" % self.__filename)
if self.__buffer1 is None or self.__buffer1.offsets[0] != 0:
self.__buffer1 = LineBuffer(self._input,
0,
self.buffer_size)
self.__buffer2 = LineBuffer(self._input,
self.__buffer1.offsets[-1],
self.buffer_size)
self.__buffer = self.__buffer1
self.__rel_index = 0
self.__lineno = 0
def to_end(self):
"""moves file position to the end"""
logging.log(self.loglevel, "LogFile.to_end %s" % self.__filename)
self._input.seek(0, 2) # to end of file
size = self._input.tell()
if self.__buffer2 is None or size > self.__buffer2.offsets[-1]:
self.__buffer2 = LineBuffer(self._input,
size,
self.buffer_size,
forward=False)
self.__buffer1 = LineBuffer(self._input,
self.__buffer2.offsets[0],
self.buffer_size,
forward=False)
self.__buffer = self.__buffer2
self.__rel_index = self.__buffer2.len
self.__lineno = None
def position(self):
""" Return the current file position
This can be converted into a String using back-ticks and then be rebuild.
For this plain file implementation position is an Integer.
"""
return self.__buffer.offsets[self.__rel_index]
def seek(self, position, line_no=None):
""" moves file position to an value formerly gotten from .position().
To enable line counting line_no must be provided.
.seek is much more efficient for moving long distances than .peek.
raises ValueError if position is invalid
"""
logging.log(self.loglevel, "LogFile.seek %s pos %d" % (self.__filename, position))
if self.__buffer1:
logging.log(self.loglevel, "b1 %r %r" % (self.__buffer1.offsets[0], self.__buffer1.offsets[-1]))
if self.__buffer2:
logging.log(self.loglevel, "b2 %r %r" % (self.__buffer2.offsets[0], self.__buffer2.offsets[-1]))
if self.__buffer1 and self.__buffer1.offsets[0] <= position < self.__buffer1.offsets[-1]:
# position is in .__buffer1
self.__rel_index = self.__buffer1.offsets.index(position)
self.__buffer = self.__buffer1
elif self.__buffer2 and self.__buffer2.offsets[0] <= position < self.__buffer2.offsets[-1]:
# position is in .__buffer2
self.__rel_index = self.__buffer2.offsets.index(position)
self.__buffer = self.__buffer2
elif self.__buffer1 and self.__buffer1.offsets[-1] == position:
# we already have one buffer directly before where we want to go
self.__buffer2 = LineBuffer(self._input,
position,
self.buffer_size)
self.__buffer = self.__buffer2
self.__rel_index = 0
elif self.__buffer2 and self.__buffer2.offsets[-1] == position:
# we already have one buffer directly before where we want to go
self.__buffer1 = self.__buffer2
self.__buffer2 = LineBuffer(self._input,
position,
self.buffer_size)
self.__buffer = self.__buffer2
self.__rel_index = 0
else:
# load buffers around position
self.__buffer1 = LineBuffer(self._input,
position,
self.buffer_size,
forward=False)
self.__buffer2 = LineBuffer(self._input,
position,
self.buffer_size)
self.__buffer = self.__buffer2
self.__rel_index = 0
# XXX test for valid position
self.__lineno = line_no
def line_no(self):
"""@return: the current line number or None if line number is unknown"""
return self.__lineno
def calculate_line_no(self):
""" Calculate the current line number from buffer offsets
If line number is unknown it is calculated by parsing the whole file.
This may be expensive.
"""
self._input.seek(0, 0)
lines = self._input.read(self.__buffer.offsets[self.__rel_index])
self.__lineno = len(lines.splitlines())
return self.__lineno
def parser(self, line):
"""
@param line: line as read from file
@return: parsed line or None on error
Converts the line from file to program representation
This implementation uses TAB separated strings.
This method should be overwritten by the sub classes.
"""
return line.split("\t")
def add(self, *data):
"""
add line to log file
This implementation save the values as TAB separated strings.
This method should be overwritten by the sub classes.
"""
line = "\t".join(data)
self._add(line)
def _add(self, line):
"""
@param line: flat line
@type line: String
write on entry in the log file
"""
if line is not None:
if line[-1] != '\n':
line += '\n'
self._output.write(line)
self._output.close() # does this maybe help against the sporadic fedora wikis 160 \0 bytes in the edit-log?
del self._output # re-open the output file automagically
|