3
3
MoinMoin - LogFile package
5
@copyright: 2005 by Thomas Waldmann (MoinMoin:ThomasWaldmann)
5
This module supports buffered log reads, iterating forward and backward line-by-line, etc.
7
@copyright: 2005-2007 MoinMoin:ThomasWaldmann
6
8
@license: GNU GPL, see COPYING for details.
9
from MoinMoin.util import pysupport
11
logfiles = pysupport.getPackageModules(__file__)
11
from MoinMoin import log
12
logging = log.getLogger(__name__)
14
import os, codecs, errno
15
from MoinMoin import config, wikiutil
17
class LogError(Exception):
18
""" Base class for log errors """
20
class LogMissing(LogError):
21
""" Raised when the log is missing """
26
Reads lines from a file
27
self.len number of lines in self.lines
28
self.lines list of lines (unicode)
29
self.offsets list of file offsets for each line. additionally the position
30
after the last read line is stored into self.offsets[-1]
32
def __init__(self, file, offset, size, forward=True):
35
TODO: when this gets refactored, don't use "file" (is a builtin)
37
@param file: open file object
38
@param offset: position in file to start from
39
@param size: aproximate number of bytes to read
40
@param forward : read from offset on or from offset-size to offset
41
@type forward: boolean
43
self.loglevel = logging.NOTSET
46
logging.log(self.loglevel, "LineBuffer.init: forward seek %d read %d" % (begin, size))
48
lines = file.readlines(size)
55
logging.log(self.loglevel, "LineBuffer.init: backward seek %d read %d" % (begin, size))
57
lines = file.read(size).splitlines(True)
59
# remove potentially incomplete first line
60
begin += len(lines[0])
62
# XXX check for min one line read
64
linecount = len(lines)
66
# now calculate the file offsets of all read lines
67
offsets = [len(line) for line in lines]
68
offsets.append(0) # later this element will have the file offset after the last read line
70
lengthpreviousline = 0
72
for i in xrange(linecount+1):
73
offset += lengthpreviousline
74
lengthpreviousline = offsets[i]
77
self.offsets = offsets
79
# Decode lines after offset in file is calculated
80
self.lines = [unicode(line, config.charset) for line in lines]
85
.filter: function that gets the values from .parser.
86
must return True to keep it or False to remove it
87
Overwrite .parser() and .add() to customize this class to special log files
90
def __init__(self, filename, buffer_size=4096):
92
@param filename: name of the log file
93
@param buffer_size: approx. size of one buffer in bytes
95
self.loglevel = logging.NOTSET
96
self.__filename = filename
97
self.__buffer = None # currently used buffer, points to one of the following:
100
self.buffer_size = buffer_size
108
""" yield log entries in reverse direction starting from last one
115
logging.log(self.loglevel, "LogFile.reverse %s" % self.__filename)
116
result = self.previous()
117
except StopIteration:
121
def sanityCheck(self):
122
""" Check for log file write access.
124
@rtype: string (error message) or None
126
if not os.access(self.__filename, os.W_OK):
127
return "The log '%s' is not writable!" % (self.__filename, )
130
def __getattr__(self, name):
132
generate some attributes when needed
134
if name == "_LogFile__rel_index": # Python black magic: this is the real name of the __rel_index attribute
135
# starting iteration from begin
136
self.__buffer1 = LineBuffer(self._input, 0, self.buffer_size)
137
self.__buffer2 = LineBuffer(self._input,
138
self.__buffer1.offsets[-1],
140
self.__buffer = self.__buffer1
143
elif name == "_input":
145
# Open the file (NOT using codecs.open, it breaks our offset calculation. We decode it later.).
146
# Use binary mode in order to retain \r - otherwise the offset calculation would fail.
147
self._input = file(self.__filename, "rb", )
149
if err.errno == errno.ENOENT: # "file not found"
150
# XXX workaround if edit-log does not exist: just create it empty
151
# if this workaround raises another error, we don't catch
152
# it, so the admin will see it.
153
f = file(self.__filename, "ab")
156
self._input = file(self.__filename, "rb", )
158
logging.error("logfile: %r IOERROR errno %d (%s)" % (self.__filename, err.errno, os.strerror(err.errno)))
161
elif name == "_output":
162
self._output = codecs.open(self.__filename, 'a', config.charset)
165
raise AttributeError(name)
168
""" Return log size in bytes
170
Return 0 if the file does not exist. Raises other OSError.
172
@return: size of log file in bytes
176
return os.path.getsize(self.__filename)
178
if err.errno == errno.ENOENT:
183
""" Return number of lines in the log file
185
Return 0 if the file does not exist. Raises other OSError.
187
Expensive for big log files - O(n)
189
@return: size of log file in lines
193
f = file(self.__filename, 'r')
201
except (OSError, IOError), err:
202
if err.errno == errno.ENOENT:
207
# ToDo check if we need this method
208
""" Return timestamp of log file in usecs """
210
mtime = os.path.getmtime(self.__filename)
212
if err.errno == errno.ENOENT:
213
# This can happen on fresh wiki when building the index
214
# Usually the first request will create an event log
215
raise LogMissing(str(err))
217
return wikiutil.timestamp2version(mtime)
219
def peek(self, lines):
220
""" Move position in file forward or backwards by "lines" count
222
It adjusts .__lineno if set.
223
This function is not aware of filters!
225
@param lines: number of lines, may be negative to move backward
227
@return: True if moving more than to the beginning and moving
230
logging.log(self.loglevel, "LogFile.peek %s" % self.__filename)
231
self.__rel_index += lines
232
while self.__rel_index < 0:
233
if self.__buffer is self.__buffer2:
234
if self.__buffer.offsets[0] == 0:
235
# already at the beginning of the file
241
self.__buffer = self.__buffer1
242
self.__rel_index += self.__buffer.len
243
else: # self.__buffer is self.__buffer1
244
if self.__buffer.offsets[0] == 0:
245
# already at the beginning of the file
250
# load previous lines
251
self.__buffer2 = self.__buffer1
252
self.__buffer1 = LineBuffer(self._input,
253
self.__buffer.offsets[0],
256
self.__buffer = self.__buffer1
257
self.__rel_index += self.__buffer.len
259
while self.__rel_index >= self.__buffer.len:
260
if self.__buffer is self.__buffer1:
262
self.__rel_index -= self.__buffer.len
263
self.__buffer = self.__buffer2
264
else: # self.__buffer is self.__buffer2
265
# try to load next buffer
266
tmpbuff = LineBuffer(self._input,
267
self.__buffer.offsets[-1],
271
if self.__lineno is not None:
272
self.__lineno += (lines -
273
(self.__rel_index - self.__buffer.len))
274
self.__rel_index = self.__buffer.len # point to after last read line
277
self.__rel_index -= self.__buffer.len
278
self.__buffer1 = self.__buffer2
279
self.__buffer2 = tmpbuff
280
self.__buffer = self.__buffer2
282
if self.__lineno is not None:
283
self.__lineno += lines
287
"""get next line already parsed"""
290
result = self.parser(self.__buffer.lines[self.__rel_index])
295
"""get next line that passes through the filter
297
raises StopIteration at file end
300
while result is None:
301
while result is None:
302
logging.log(self.loglevel, "LogFile.next %s" % self.__filename)
303
result = self.__next()
304
if self.filter and not self.filter(result):
308
def __previous(self):
309
"""get previous line already parsed"""
312
return self.parser(self.__buffer.lines[self.__rel_index])
315
"""get previous line that passes through the filter
316
@return: previous entry
317
raises StopIteration at file begin
320
while result is None:
321
while result is None:
322
logging.log(self.loglevel, "LogFile.previous %s" % self.__filename)
323
result = self.__previous()
324
if self.filter and not self.filter(result):
329
"""moves file position to the begin"""
330
logging.log(self.loglevel, "LogFile.to_begin %s" % self.__filename)
331
if self.__buffer1 is None or self.__buffer1.offsets[0] != 0:
332
self.__buffer1 = LineBuffer(self._input,
335
self.__buffer2 = LineBuffer(self._input,
336
self.__buffer1.offsets[-1],
338
self.__buffer = self.__buffer1
343
"""moves file position to the end"""
344
logging.log(self.loglevel, "LogFile.to_end %s" % self.__filename)
345
self._input.seek(0, 2) # to end of file
346
size = self._input.tell()
347
if self.__buffer2 is None or size > self.__buffer2.offsets[-1]:
348
self.__buffer2 = LineBuffer(self._input,
353
self.__buffer1 = LineBuffer(self._input,
354
self.__buffer2.offsets[0],
357
self.__buffer = self.__buffer2
358
self.__rel_index = self.__buffer2.len
362
""" Return the current file position
364
This can be converted into a String using back-ticks and then be rebuild.
365
For this plain file implementation position is an Integer.
367
return self.__buffer.offsets[self.__rel_index]
369
def seek(self, position, line_no=None):
370
""" moves file position to an value formerly gotten from .position().
371
To enable line counting line_no must be provided.
372
.seek is much more efficient for moving long distances than .peek.
373
raises ValueError if position is invalid
375
logging.log(self.loglevel, "LogFile.seek %s pos %d" % (self.__filename, position))
377
logging.log(self.loglevel, "b1 %r %r" % (self.__buffer1.offsets[0], self.__buffer1.offsets[-1]))
379
logging.log(self.loglevel, "b2 %r %r" % (self.__buffer2.offsets[0], self.__buffer2.offsets[-1]))
380
if self.__buffer1 and self.__buffer1.offsets[0] <= position < self.__buffer1.offsets[-1]:
381
# position is in .__buffer1
382
self.__rel_index = self.__buffer1.offsets.index(position)
383
self.__buffer = self.__buffer1
384
elif self.__buffer2 and self.__buffer2.offsets[0] <= position < self.__buffer2.offsets[-1]:
385
# position is in .__buffer2
386
self.__rel_index = self.__buffer2.offsets.index(position)
387
self.__buffer = self.__buffer2
388
elif self.__buffer1 and self.__buffer1.offsets[-1] == position:
389
# we already have one buffer directly before where we want to go
390
self.__buffer2 = LineBuffer(self._input,
393
self.__buffer = self.__buffer2
395
elif self.__buffer2 and self.__buffer2.offsets[-1] == position:
396
# we already have one buffer directly before where we want to go
397
self.__buffer1 = self.__buffer2
398
self.__buffer2 = LineBuffer(self._input,
401
self.__buffer = self.__buffer2
404
# load buffers around position
405
self.__buffer1 = LineBuffer(self._input,
409
self.__buffer2 = LineBuffer(self._input,
412
self.__buffer = self.__buffer2
414
# XXX test for valid position
415
self.__lineno = line_no
418
"""@return: the current line number or None if line number is unknown"""
421
def calculate_line_no(self):
422
""" Calculate the current line number from buffer offsets
424
If line number is unknown it is calculated by parsing the whole file.
425
This may be expensive.
427
self._input.seek(0, 0)
428
lines = self._input.read(self.__buffer.offsets[self.__rel_index])
429
self.__lineno = len(lines.splitlines())
432
def parser(self, line):
434
@param line: line as read from file
435
@return: parsed line or None on error
436
Converts the line from file to program representation
437
This implementation uses TAB separated strings.
438
This method should be overwritten by the sub classes.
440
return line.split("\t")
442
def add(self, *data):
445
This implementation save the values as TAB separated strings.
446
This method should be overwritten by the sub classes.
448
line = "\t".join(data)
451
def _add(self, line):
453
@param line: flat line
455
write on entry in the log file
460
self._output.write(line)
461
self._output.close() # does this maybe help against the sporadic fedora wikis 160 \0 bytes in the edit-log?
462
del self._output # re-open the output file automagically