1
# Found on a russian zope mailing list, and modified to fix bugs in parsing
2
# the magic file and string making
3
# -- Daniel Berlin <dberlin@dberlin.org>
4
import sys, struct, time, re, exceptions, pprint, stat, os, pwd, grp
9
# _magic='/usr/share/magic.mime'
10
_magic='/usr/share/magic.mime'
13
_ldate_adjust = lambda x: time.mktime( time.gmtime(x) )
15
BUFFER_SIZE = 1024 * 128 # 128K should be enough...
17
class MagicError(exceptions.Exception): pass
19
def _handle(fmt='@x',adj=None): return fmt, struct.calcsize(fmt), adj
22
# 'byte':_handle('@b'),
24
'ubyte':_handle('@B'),
26
'string':('s',0,None),
27
'pstring':_handle('p'),
29
# 'short':_handle('@h'),
30
# 'beshort':_handle('>h'),
31
# 'leshort':_handle('<h'),
32
'short':_handle('@H'),
33
'beshort':_handle('>H'),
34
'leshort':_handle('<H'),
35
'ushort':_handle('@H'),
36
'ubeshort':_handle('>H'),
37
'uleshort':_handle('<H'),
40
'belong':_handle('>l'),
41
'lelong':_handle('<l'),
42
'ulong':_handle('@L'),
43
'ubelong':_handle('>L'),
44
'ulelong':_handle('<L'),
47
'bedate':_handle('>l'),
48
'ledate':_handle('<l'),
49
'ldate':_handle('=l',_ldate_adjust),
50
'beldate':_handle('>l',_ldate_adjust),
51
'leldate':_handle('<l',_ldate_adjust),
60
if _mew_cnt % 64 == 0 : sys.stderr.write( '\n' )
61
sys.stderr.write( '.' )
63
sys.stderr.write( '\b'+x )
75
def read_asciiz(file,size=None,pos=None):
82
s = [file.read( size ).split('\0')[0]]
86
if (not c) or (ord(c)==0) or (c=='\n') : break
92
if v[-1:] in 'lL' : v = v[:-1]
99
for c in range(ord('a'),ord('z')+1) :
100
try : e = eval('"\\%c"' % chr(c))
101
except ValueError : pass
102
else : _cmap[chr(c)] = e
108
return eval( '"'+s.replace('"','\\"')+'"')
110
class MagicTestError(MagicError): pass
113
def __init__(self,offset,mtype,test,message,line=None,level=None):
114
self.line, self.level = line, level
121
self.offset, self.type, self.test, self.message = \
122
offset,mtype,test,message
123
if self.mtype == 'true' : return # XXX hack to enable level skips
124
if test[-1:]=='\\' and test[-2:]!='\\\\' :
125
self.test += 'n' # looks like someone wanted EOL to match?
126
if mtype[:6]=='string' :
127
if '/' in mtype : # for strings
128
self.type, self.smod = \
129
mtype[:mtype.find('/')], mtype[mtype.find('/')+1:]
132
if nm in mtype : # for integer-based
133
self.nmod, self.type, self.mask = (
135
mtype[:mtype.find(nm)],
136
# convert mask to int, autodetect base
137
int( mtype[mtype.find(nm)+1:], 0 )
140
self.struct, self.size, self.cast = KnownTypes[ self.type ]
142
return '%s %s %s %s' % (
143
self.offset, self.mtype, self.mtest, self.message
146
return 'MagicTest(%s,%s,%s,%s,line=%s,level=%s,subtests=\n%s%s)' % (
147
`self.offset`, `self.mtype`, `self.mtest`, `self.message`,
148
`self.line`, `self.level`,
149
'\t'*self.level, pprint.pformat(self.subtests)
155
if type(file) == type('x') :
156
file = open( file, 'r', BUFFER_SIZE )
159
# saved_pos = file.tell()
160
if self.mtype != 'true' :
161
data = self.read(file)
165
if self.check( data ) :
166
result = self.message+' '
167
if has_format( result ) : result %= data
168
for test in self.subtests :
170
if m is not None : result += m
171
return make_string( result )
176
# file.seek( saved_pos, 0 )
177
def get_mod_and_value(self):
178
if self.type[-6:] == 'string' :
179
# "something like\tthis\n"
180
if self.test[0] in '=<>' :
181
mod, value = self.test[0], make_string( self.test[1:] )
183
mod, value = '=', make_string( self.test )
185
if self.test[0] in '=<>&^' :
186
mod, value = self.test[0], a2i(self.test[1:])
187
elif self.test[0] == 'x':
191
mod, value = '=', a2i(self.test)
195
file.seek( self.offset(file), 0 ) # SEEK_SET
199
# XXX self.size might be 0 here...
201
# this is an ASCIIZ string...
203
if self.test != '>\\0' : # magic's hack for string read...
204
value = self.get_mod_and_value()[1]
205
size = (value=='\0') and None or len(value)
206
rdata = data = read_asciiz( file, size=size )
208
rdata = file.read( self.size )
209
if not rdata or (len(rdata)!=self.size) : return None
210
data = struct.unpack( self.struct, rdata )[0] # XXX hack??
212
print >>sys.stderr, self
213
print >>sys.stderr, '@%s struct=%s size=%d rdata=%s' % (
214
self.offset, `self.struct`, self.size,`rdata`)
217
if self.cast : data = self.cast( data )
220
if self.nmod == '&' : data &= self.mask
221
elif self.nmod == '+' : data += self.mask
222
elif self.nmod == '-' : data -= self.mask
223
else: raise MagicTestError(self.nmod)
225
print >>sys.stderr,'data=%s nmod=%s mask=%s' % (
226
`data`, `self.nmod`, `self.mask`
230
def check(self,data):
232
if self.mtype == 'true' :
233
return '' # not None !
234
mod, value = self.get_mod_and_value()
235
if self.type[-6:] == 'string' :
236
# "something like\tthis\n"
239
if 'b' in self.smod : # all blanks are optional
240
xdata = ''.join( data.split() )
241
value = ''.join( value.split() )
242
if 'c' in self.smod : # all blanks are optional
243
xdata = xdata.upper()
244
value = value.upper()
245
# if 'B' in self.smod : # compact blanks
246
### XXX sorry, i don't understand this :-(
247
# data = ' '.join( data.split() )
248
# if ' ' not in data : return None
252
if mod == '=' : result = data == value
253
elif mod == '<' : result = data < value
254
elif mod == '>' : result = data > value
255
elif mod == '&' : result = data & value
256
elif mod == '^' : result = (data & (~value)) == 0
257
elif mod == 'x' : result = 1
258
else : raise MagicTestError(self.test)
260
zdata, zval = `data`, `value`
261
if self.mtype[-6:]!='string' :
262
try: zdata, zval = hex(data), hex(value)
263
except: zdata, zval = `data`, `value`
264
if 0 : print >>sys.stderr, '%s @%s %s:%s %s %s => %s (%s)' % (
265
'>'*self.level, self.offset,
266
zdata, self.mtype, `mod`, zval, `result`,
271
print >>sys.stderr,'mtype=%s data=%s mod=%s value=%s' % (
272
`self.mtype`, `data`, `mod`, `value`
276
if not isinstance(mt,MagicTest) :
277
raise MagicTestError((mt,'incorrect subtest type %s'%(type(mt),)))
278
if mt.level == self.level+1 :
279
self.subtests.append( mt )
281
self.subtests[-1].add( mt )
282
elif mt.level > self.level+1 :
283
# it's possible to get level 3 just after level 1 !!! :-(
284
level = self.level + 1
285
while level < mt.level :
286
xmt = MagicTest(None,'true','x','',line=self.line,level=level)
290
self.add( mt ) # retry...
292
raise MagicTestError((mt,'incorrect subtest level %s'%(`mt.level`,)))
294
return self.subtests[-1]
297
class OffsetError(MagicError): pass
300
pos_format = {'b':'<B','B':'>B','s':'<H','S':'>H','l':'<I','L':'>I',}
301
pattern0 = re.compile(r''' # mere offset
303
&? # possible ampersand
305
| [1-9]{1,1}[0-9]* # decimal
312
pattern1 = re.compile(r''' # indirect offset
314
(?P<base>&?0 # just zero
315
|&?[1-9]{1,1}[0-9]* # decimal
320
\. # this dot might be alone
321
[BSL]? # one of this chars in either case
326
(?P<off>0 # just zero
327
|[1-9]{1,1}[0-9]* # decimal
333
def __init__(self,s):
337
self.base = self.type = self.sign = self.offs = None
338
m = Offset.pattern0.match( s )
339
if m : # just a number
341
self.relative, self.value = 1, int( s[1:], 0 )
343
self.value = int( s, 0 )
345
m = Offset.pattern1.match( s )
346
if m : # real indirect offset
348
self.base = m.group('base')
349
if self.base[0] == '&' :
350
self.relative, self.base = 1, int( self.base[1:], 0 )
352
self.base = int( self.base, 0 )
353
if m.group('type') : self.type = m.group('type')[1:]
354
self.sign = m.group('sign')
355
if m.group('off') : self.offs = int( m.group('off'), 0 )
356
if self.sign == '-' : self.offs = 0 - self.offs
358
print >>sys.stderr, '$$', m.groupdict()
361
raise OffsetError(`s`)
362
def __call__(self,file=None):
363
if self.value is not None : return self.value
366
if not self.relative : file.seek( self.offset, 0 )
367
frmt = Offset.pos_format.get( self.type, 'I' )
368
size = struct.calcsize( frmt )
369
data = struct.unpack( frmt, file.read( size ) )
370
if self.offs : data += self.offs
374
def __str__(self): return self.source
375
def __repr__(self): return 'Offset(%s)' % `self.source`
378
class MagicFileError(MagicError): pass
381
def __init__(self,filename=_magic):
385
self.load( filename )
386
self.ack_tests = None
387
self.nak_tests = None
390
def load(self,filename=None):
391
self.open( filename )
394
def open(self,filename=None):
396
if filename is not None :
397
self.filename = filename
398
self.file = open( self.filename, 'r', BUFFER_SIZE )
405
for line in self.file.xreadlines() :
407
if not line or line[0]=='#' : continue
408
line = line.lstrip().rstrip('\r\n')
409
if not line or line[0]=='#' : continue
411
x = self.parse_line( line )
413
print >>sys.stderr, '#[%04d]#'%line_no, line
416
print >>sys.stderr, '###[%04d]###'%line_no, line
418
self.total_tests += 1
419
level, offset, mtype, test, message = x
420
new_test = MagicTest(offset,mtype,test,message,
421
line=line_no,level=level)
424
self.tests.append( new_test )
426
self.tests[-1].add( new_test )
429
print >>sys.stderr, 'total tests=%s' % (
432
print >>sys.stderr, 'level=%s' % (
435
print >>sys.stderr, 'tests=%s' % (
436
pprint.pformat(self.tests),
440
while self.tests[-1].level > 0 :
442
def parse_line(self,line):
443
# print >>sys.stderr, 'line=[%s]' % line
444
if (not line) or line[0]=='#' : return None
446
offset = mtype = test = message = ''
448
# get optional level (count leading '>')
449
while line and line[0]=='>' :
450
line, level = line[1:], level+1
452
while line and not line[0].isspace() :
453
offset, line = offset+line[0], line[1:]
455
offset = Offset(offset)
457
print >>sys.stderr, 'line=[%s]' % line
464
last_c, c, line = c, line[0], line[1:]
465
if last_c!='\\' and c.isspace() :
466
break # unescaped space - end of field
470
c = None # don't fuck my brain with sequential backslashes
476
last_c, c, line = c, line[0], line[1:]
477
if last_c!='\\' and c.isspace() :
478
break # unescaped space - end of field
482
c = None # don't fuck my brain with sequential backslashes
487
if mime and line.find("\t") != -1:
488
message=line[0:line.find("\t")]
490
# print '>>', level, offset, mtype, test, message
491
return level, offset, mtype, test, message
492
def detect(self,file):
496
for test in self.tests :
497
message = test.run( file )
500
answers.append( message )
504
return '; '.join( answers )
509
return pwd.getpwuid( uid )[0]
515
return grp.getgrgid( gid )[0]
519
def get_file_type(fname,follow):
523
st = os.lstat( fname ) # stat that entry, don't follow links!
524
except os.error, why :
527
if stat.S_ISLNK(st[stat.ST_MODE]) :
530
lnk = os.readlink( fname )
537
st = os.stat( fname )
538
except os.error, why :
539
return "can't stat `%s' (%s)." % (why.filename,why.strerror)
541
dmaj, dmin = (st.st_rdev>>8)&0x0FF, st.st_rdev&0x0FF
544
elif stat.S_ISSOCK(st.st_mode) : t = 'socket'
545
elif stat.S_ISLNK (st.st_mode) : t = follow and 'symbolic link' or t
546
elif stat.S_ISREG (st.st_mode) : t = 'file'
547
elif stat.S_ISBLK (st.st_mode) : t = 'block special (%d/%d)'%(dmaj,dmin)
548
elif stat.S_ISDIR (st.st_mode) : t = 'directory'
549
elif stat.S_ISCHR (st.st_mode) : t = 'character special (%d/%d)'%(dmaj,dmin)
550
elif stat.S_ISFIFO(st.st_mode) : t = 'pipe'
551
else: t = '<unknown>'
553
if st.st_mode & stat.S_ISUID :
554
t = 'setuid(%d=%s) %s'%(st.st_uid,username(st.st_uid),t)
555
if st.st_mode & stat.S_ISGID :
556
t = 'setgid(%d=%s) %s'%(st.st_gid,groupname(st.st_gid),t)
557
if st.st_mode & stat.S_ISVTX :
562
HELP = '''%s [options] [files...]
566
-?, --help -- this help
567
-m, --magic=<file> -- use this magic <file> instead of %s
568
-f, --files=<namefile> -- read filenames for <namefile>
569
* -C, --compile -- write "compiled" magic file
570
-b, --brief -- don't prepend filenames to output lines
571
+ -c, --check -- check the magic file
572
-i, --mime -- output MIME types
573
* -k, --keep-going -- don't stop st the first match
574
-n, --flush -- flush stdout after each line
575
-v, --verson -- print version and exit
576
* -z, --compressed -- try to look inside compressed files
577
-L, --follow -- follow symlinks
578
-s, --special -- don't skip special files
580
* -- not implemented so far ;-)
581
+ -- implemented, but in another way...
595
opts, args = getopt.getopt(
613
except getopt.error, why:
614
print >>sys.stderr, sys.argv[0], why
619
if o in ('-?','--help'):
625
elif o in ('-f','--files='):
627
elif o in ('-m','--magic='):
629
elif o in ('-C','--compile'):
631
elif o in ('-b','--brief'):
633
elif o in ('-c','--check'):
635
elif o in ('-i','--mime'):
637
if os.path.exists( _magic+'.mime' ) :
639
print >>sys.stderr,sys.argv[0]+':',\
640
"Using regular magic file `%s'" % _magic
641
elif o in ('-k','--keep-going'):
643
elif o in ('-n','--flush'):
645
elif o in ('-v','--version'):
648
elif o in ('-z','--compressed'):
650
elif o in ('-L','--follow'):
652
elif o in ('-s','--special'):
656
files = map(lambda x: x.strip(), v.split(','))
657
if '-' in files and '-' in args :
658
error( 1, 'cannot use STDIN simultaneously for file list and data' )
663
or open(file,'r',BUFFER_SIZE)
666
if name not in args :
669
if check : print >>sys.stderr, 'Loading magic database...'
671
m = MagicFile(_magic)
674
print >>sys.stderr, \
675
m.total_tests, 'tests loaded', \
676
'for', '%.2f' % (t1-t0), 'seconds'
677
print >>sys.stderr, len(m.tests), 'tests at top level'
678
return 0 # XXX "shortened" form ;-)
680
mlen = max( map(len, args) )+1
682
if not brief : print (arg + ':').ljust(mlen),
683
ftype = get_file_type( arg, follow )
684
if (special and ftype.find('special')>=0) \
685
or ftype[-4:] == 'file' :
689
except (IOError,os.error), why:
690
t = "can't read `%s' (%s)" % (why.filename,why.strerror)
691
if ftype[-4:] == 'file' : t = ftype[:-4] + t
693
print t and t or 'data'
695
'#\t%d tests ok, %d tests failed for %.2f seconds'%\
696
(m.ack_tests, m.nak_tests, t1-t0)
698
print mime and 'application/x-not-regular-file' or ftype
699
if flush : sys.stdout.flush()
700
# print >>sys.stderr, 'DONE'
709
if __name__ == '__main__' :