2
# $Id: mpeginfo.py 398 2005-02-15 18:52:51Z dischi $
4
# Revision 1.33 2005/02/15 18:52:51 dischi
5
# some strange bugfix (what is this doing?)
7
# Revision 1.32 2005/01/21 16:37:02 dischi
8
# try to find bad timestamps
10
# Revision 1.31 2005/01/08 12:06:45 dischi
11
# make sure the buffer is big enough
13
# Revision 1.30 2005/01/02 14:57:27 dischi
14
# detect ac3 in normal mpeg2
16
# Revision 1.29 2004/11/27 14:42:12 dischi
17
# remove future warning
19
# Revision 1.28 2004/11/15 21:43:36 dischi
20
# remove bad debugging stuff
22
# Revision 1.27 2004/11/12 18:10:45 dischi
23
# add ac3 support in mpeg streams
25
# Revision 1.26 2004/10/04 18:06:54 dischi
26
# test length of remaining buffer
28
# Revision 1.25 2004/07/11 19:37:25 dischi
29
# o read more bytes on ts scan
30
# o support for AC3 in private streams
32
# Revision 1.24 2004/07/03 09:01:32 dischi
33
# o fix PES start detection inside TS
34
# o try to find out if the stream is progressive or interlaced
36
# Revision 1.23 2004/06/23 19:44:10 dischi
37
# better length detection, big cleanup
39
# Revision 1.22 2004/06/22 21:37:34 dischi
41
# o basic length detection for TS and PES
43
# Revision 1.21 2004/06/21 20:37:34 dischi
44
# basic support for mpeg-ts
46
# Revision 1.20 2004/03/13 23:41:59 dischi
47
# add AudioInfo to mpeg for all streams
49
# Revision 1.19 2004/02/11 20:11:54 dischi
50
# Updated length calculation for mpeg files. This may not work for all files.
53
# MMPython - Media Metadata for Python
54
# Copyright (C) 2003 Thomas Schueppel
56
# This program is free software; you can redistribute it and/or modify
57
# it under the terms of the GNU General Public License as published by
58
# the Free Software Foundation; either version 3 of the License, or
59
# (at your option) any later version.
61
# This program is distributed in the hope that it will be useful, but
62
# WITHOUT ANY WARRANTY; without even the implied warranty of MER-
63
# CHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
64
# Public License for more details.
66
# You should have received a copy of the GNU General Public License along
67
# with this program; if not, write to the Free Software Foundation, Inc.,
68
# 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
70
# -----------------------------------------------------------------------
79
from modules.mmpython import mediainfo
83
#------------------------------------------------------------------------
86
# Start Codes, with 'slice' occupying 0x01..0xAF
87
#------------------------------------------------------------------------
89
0x00 : 'picture_start_code',
92
0xB2 : 'user_data_start_code',
93
0xB3 : 'sequence_header_code',
94
0xB4 : 'sequence_error_code',
95
0xB5 : 'extension_start_code',
97
0xB7 : 'sequence end',
98
0xB8 : 'group of pictures',
100
for i in range(0x01,0xAF):
101
START_CODE[i] = 'slice_start_code'
103
#------------------------------------------------------------------------
105
#------------------------------------------------------------------------
114
SEQ_START_CODE = 0xB3
120
PRIVATE_STREAM1 = 0xBD
121
PRIVATE_STREAM2 = 0xBf
123
TS_PACKET_LENGTH = 188
126
#------------------------------------------------------------------------
129
# A lookup table of all the standard frame rates. Some rates adhere to
130
# a particular profile that ensures compatibility with VLSI capabilities
131
# of the early to mid 1990s.
134
# Constrained Parameters Bitstreams, an MPEG-1 set of sampling and
135
# bitstream parameters designed to normalize decoder computational
136
# complexity, buffer size, and memory bandwidth while still addressing
137
# the widest possible range of applications.
140
# MPEG-2 Video Main Profile and Main Level is analogous to MPEG-1's
141
# CPB, with sampling limits at CCIR 601 parameters (720x480x30 Hz or
144
#------------------------------------------------------------------------
147
round(24000.0/1001*100)/100, # 3-2 pulldown NTSC (CPB/Main Level)
148
24, # Film (CPB/Main Level)
149
25, # PAL/SECAM or 625/60 video
150
round(30000.0/1001*100)/100, # NTSC (CPB/Main Level)
151
30, # drop-frame NTSC or component 525/60 (CPB/Main Level)
152
50, # double-rate PAL
153
round(60000.0/1001*100)/100, # double-rate NTSC
154
60, # double-rate, drop-frame NTSC/component 525/60 video
157
#------------------------------------------------------------------------
158
# ASPECT_RATIO -- INCOMPLETE?
160
# This lookup table maps the header aspect ratio index to a common name.
161
# These are just the defined ratios for CPB I believe. As I understand
162
# it, a stream that doesn't adhere to one of these aspect ratios is
163
# technically considered non-compliant.
164
#------------------------------------------------------------------------
165
ASPECT_RATIO = [ 'Forbidden',
173
class MpegInfo(mediainfo.AVInfo):
174
def __init__(self,file):
175
mediainfo.AVInfo.__init__(self)
176
self.context = 'video'
177
self.sequence_header_offset = 0
179
# detect TS (fast scan)
180
self.valid = self.isTS(file)
183
# detect system mpeg (many infos)
184
self.valid = self.isMPEG(file)
188
self.valid = self.isPES(file)
191
self.mime = 'video/mpeg'
193
self.video.append(mediainfo.VideoInfo())
195
if self.sequence_header_offset <= 0:
198
self.progressive(file)
200
for vi in self.video:
201
vi.width, vi.height = self.dxy(file)
202
vi.fps, vi.aspect = self.framerate_aspect(file)
203
vi.bitrate = self.bitrate(file)
205
vi.length = self.length
208
if self.video[0].width == 480:
209
self.type = 'MPEG2 video' # SVCD spec
210
elif self.video[0].width == 352:
211
self.type = 'MPEG1 video' # VCD spec
213
self.type = 'MPEG video'
215
if mediainfo.DEBUG > 2:
221
get width and height of the video
223
file.seek(self.sequence_header_offset+4,0)
225
x = struct.unpack('>H',v[:2])[0] >> 4
226
y = struct.unpack('>H',v[1:3])[0] & 0x0FFF
230
def framerate_aspect(self,file):
232
read framerate and aspect ratio
234
file.seek(self.sequence_header_offset+7,0)
235
v = struct.unpack( '>B', file.read(1) )[0]
237
fps = FRAME_RATE[v&0xf]
241
aspect = ASPECT_RATIO[v>>4]
244
print 'Index error: %s' % (v>>4)
249
def progressive(self, file):
251
Try to find out with brute force if the mpeg is interlaced or not.
252
Search for the Sequence_Extension in the extension header (01B5)
258
if len(buffer) < 1000:
262
buffer += file.read(1024)
263
if len(buffer) < 1000:
265
pos = buffer.find('\x00\x00\x01\xb5')
266
if pos == -1 or len(buffer) - pos < 5:
267
buffer = buffer[-10:]
269
ext = (ord(buffer[pos+4]) >> 4)
273
if (ord(buffer[pos+5]) >> 3) & 1:
274
self.keys.append('progressive')
277
self.keys.append('interlaced')
282
buffer = buffer[pos+4:]
286
#------------------------------------------------------------------------
289
# From the MPEG-2.2 spec:
291
# bit_rate -- This is a 30-bit integer. The lower 18 bits of the
292
# integer are in bit_rate_value and the upper 12 bits are in
293
# bit_rate_extension. The 30-bit integer specifies the bitrate of the
294
# bitstream measured in units of 400 bits/second, rounded upwards.
295
# The value zero is forbidden.
297
# So ignoring all the variable bitrate stuff for now, this 30 bit integer
298
# multiplied times 400 bits/sec should give the rate in bits/sec.
300
# TODO: Variable bitrates? I need one that implements this.
302
# Continued from the MPEG-2.2 spec:
304
# If the bitstream is a constant bitrate stream, the bitrate specified
305
# is the actual rate of operation of the VBV specified in annex C. If
306
# the bitstream is a variable bitrate stream, the STD specifications in
307
# ISO/IEC 13818-1 supersede the VBV, and the bitrate specified here is
308
# used to dimension the transport stream STD (2.4.2 in ITU-T Rec. xxx |
309
# ISO/IEC 13818-1), or the program stream STD (2.4.5 in ITU-T Rec. xxx |
312
# If the bitstream is not a constant rate bitstream the vbv_delay
313
# field shall have the value FFFF in hexadecimal.
315
# Given the value encoded in the bitrate field, the bitstream shall be
316
# generated so that the video encoding and the worst case multiplex
317
# jitter do not cause STD buffer overflow or underflow.
320
#------------------------------------------------------------------------
323
# Some parts in the code are based on mpgtx (mpgtx.sf.net)
325
def bitrate(self,file):
327
read the bitrate (most of the time broken)
329
file.seek(self.sequence_header_offset+8,0)
330
t,b = struct.unpack( '>HB', file.read(3) )
331
vrate = t << 2 | b >> 6
335
def ReadSCRMpeg2(self, buffer):
337
read SCR (timestamp) for MPEG2 at the buffer beginning (6 Bytes)
339
highbit = (ord(buffer[0])&0x20)>>5
341
low4Bytes= ((long(ord(buffer[0])) & 0x18) >> 3) << 30
342
low4Bytes |= (ord(buffer[0]) & 0x03) << 28
343
low4Bytes |= ord(buffer[1]) << 20
344
low4Bytes |= (ord(buffer[2]) & 0xF8) << 12
345
low4Bytes |= (ord(buffer[2]) & 0x03) << 13
346
low4Bytes |= ord(buffer[3]) << 5
347
low4Bytes |= (ord(buffer[4])) >> 3
349
sys_clock_ref=(ord(buffer[4]) & 0x3) << 7
350
sys_clock_ref|=(ord(buffer[5]) >> 1)
352
return (long(highbit * (1<<16) * (1<<16)) + low4Bytes) / 90000
355
def ReadSCRMpeg1(self, buffer):
357
read SCR (timestamp) for MPEG1 at the buffer beginning (5 Bytes)
359
highbit = (ord(buffer[0]) >> 3) & 0x01
361
low4Bytes = ((long(ord(buffer[0])) >> 1) & 0x03) << 30
362
low4Bytes |= ord(buffer[1]) << 22;
363
low4Bytes |= (ord(buffer[2]) >> 1) << 15;
364
low4Bytes |= ord(buffer[3]) << 7;
365
low4Bytes |= ord(buffer[4]) >> 1;
367
return (long(highbit) * (1<<16) * (1<<16) + low4Bytes) / 90000;
370
def ReadPTS(self, buffer):
372
read PTS (PES timestamp) at the buffer beginning (5 Bytes)
374
high = ((ord(buffer[0]) & 0xF) >> 1)
375
med = (ord(buffer[1]) << 7) + (ord(buffer[2]) >> 1)
376
low = (ord(buffer[3]) << 7) + (ord(buffer[4]) >> 1)
377
return ((long(high) << 30 ) + (med << 15) + low) / 90000
380
def ReadHeader(self, buffer, offset):
382
Handle MPEG header in buffer on position offset
383
Return -1 on error, new offset or 0 if the new offset can't be scanned
385
if buffer[offset:offset+3] != '\x00\x00\x01':
388
id = ord(buffer[offset+3])
390
if id == PADDING_PKT:
391
return offset + (ord(buffer[offset+4]) << 8) + ord(buffer[offset+5]) + 6
394
if ord(buffer[offset+4]) & 0xF0 == 0x20:
395
self.type = 'MPEG1 video'
396
self.get_time = self.ReadSCRMpeg1
398
elif (ord(buffer[offset+4]) & 0xC0) == 0x40:
399
self.type = 'MPEG2 video'
400
self.get_time = self.ReadSCRMpeg2
401
return offset + (ord(buffer[offset+13]) & 0x07) + 14
406
if 0xC0 <= id <= 0xDF:
407
# code for audio stream
412
self.audio.append(mediainfo.AudioInfo())
413
self.audio[-1].id = id
414
self.audio[-1].keys.append('id')
417
if 0xE0 <= id <= 0xEF:
418
# code for video stream
423
self.video.append(mediainfo.VideoInfo())
424
self.video[-1].id = id
425
self.video[-1].keys.append('id')
429
# sequence header, remember that position for later use
430
self.sequence_header_offset = offset
433
if id in (PRIVATE_STREAM1, PRIVATE_STREAM2):
434
# private stream. we don't know, but maybe we can guess later
435
add = ord(buffer[offset+8])
436
# if (ord(buffer[offset+6]) & 4) or 1:
437
# id = ord(buffer[offset+10+add])
438
if buffer[offset+11+add:offset+15+add].find('\x0b\x77') != -1:
444
self.audio.append(mediainfo.AudioInfo())
445
self.audio[-1].id = id
446
self.audio[-1].codec = 'AC3'
447
self.audio[-1].keys.append('id')
459
# Normal MPEG (VCD, SVCD) ========================================
461
def isMPEG(self, file):
463
This MPEG starts with a sequence of 0x00 followed by a PACK Header
464
http://dvd.sourceforge.net/dvdinfo/packhdr.html
467
buffer = file.read(10000)
470
# seek until the 0 byte stop
471
while buffer[offset] == '\0':
475
# test for mpeg header 0x00 0x00 0x01
476
if not buffer[offset:offset+4] == '\x00\x00\x01%s' % chr(PACK_PKT):
479
# scan the 100000 bytes of data
480
buffer += file.read(100000)
482
# scan first header, to get basic info about
483
# how to read a timestamp
484
self.ReadHeader(buffer, offset)
486
# store first timestamp
487
self.start = self.get_time(buffer[offset+4:])
488
while len(buffer) > offset + 1000 and buffer[offset:offset+3] == '\x00\x00\x01':
489
# read the mpeg header
490
new_offset = self.ReadHeader(buffer, offset)
492
# header scanning detected error, this is no mpeg
497
# we have a new offset
500
# skip padding 0 before a new header
501
while len(buffer) > offset + 10 and \
502
not ord(buffer[offset+2]):
506
# seek to new header by brute force
507
offset += buffer[offset+4:].find('\x00\x00\x01') + 4
509
# fill in values for support functions:
510
self.__seek_size__ = 1000000
511
self.__sample_size__ = 10000
512
self.__search__ = self._find_timer_
513
self.filename = file.name
515
# get length of the file
516
self.length = self.get_length()
520
def _find_timer_(self, buffer):
522
Return position of timer in buffer or -1 if not found.
523
This function is valid for 'normal' mpeg files
525
pos = buffer.find('\x00\x00\x01%s' % chr(PACK_PKT))
532
# PES ============================================================
535
def ReadPESHeader(self, offset, buffer, id=0):
538
Since it starts with 0x00 0x00 0x01 like 'normal' mpegs, this
539
function will return (0, -1) when it is no PES header or
540
(packet length, timestamp position (maybe -1))
542
http://dvd.sourceforge.net/dvdinfo/pes-hdr.html
544
if not buffer[0:3] == '\x00\x00\x01':
547
packet_length = (ord(buffer[4]) << 8) + ord(buffer[5]) + 6
548
align = ord(buffer[6]) & 4
549
header_length = ord(buffer[8])
551
# PES ID (starting with 001)
552
if ord(buffer[3]) & 0xE0 == 0xC0:
553
id = id or ord(buffer[3]) & 0x1F
558
self.audio.append(mediainfo.AudioInfo())
559
self.audio[-1].id = id
560
self.audio[-1].keys.append('id')
562
elif ord(buffer[3]) & 0xF0 == 0xE0:
563
id = id or ord(buffer[3]) & 0xF
568
self.video.append(mediainfo.VideoInfo())
569
self.video[-1].id = id
570
self.video[-1].keys.append('id')
573
if buffer[header_length+9:header_length+13] == \
574
'\x00\x00\x01\xB3' and not self.sequence_header_offset:
575
# yes, remember offset for later use
576
self.sequence_header_offset = offset + header_length+9
577
elif ord(buffer[3]) == 189 or ord(buffer[3]) == 191:
578
# private stream. we don't know, but maybe we can guess later
579
id = id or ord(buffer[3]) & 0xF
580
if align and buffer[header_length+9:header_length+11] == '\x0b\x77':
586
self.audio.append(mediainfo.AudioInfo())
587
self.audio[-1].id = id
588
self.audio[-1].codec = 'AC3'
589
self.audio[-1].keys.append('id')
595
ptsdts = ord(buffer[7]) >> 6
597
if ptsdts and ptsdts == ord(buffer[9]) >> 4:
598
if ord(buffer[9]) >> 4 != ptsdts:
599
print 'WARNING: bad PTS/DTS, please contact us'
600
return packet_length, -1
602
# timestamp = self.ReadPTS(buffer[9:14])
603
high = ((ord(buffer[9]) & 0xF) >> 1)
604
med = (ord(buffer[10]) << 7) + (ord(buffer[11]) >> 1)
605
low = (ord(buffer[12]) << 7) + (ord(buffer[13]) >> 1)
606
return packet_length, 9
608
return packet_length, -1
612
def isPES(self, file):
614
print 'trying mpeg-pes scan'
616
buffer = file.read(3)
618
# header (also valid for all mpegs)
619
if not buffer == '\x00\x00\x01':
622
self.sequence_header_offset = 0
623
buffer += file.read(10000)
626
while offset + 1000 < len(buffer):
627
pos, timestamp = self.ReadPESHeader(offset, buffer[offset:])
630
if timestamp != -1 and not hasattr(self, 'start'):
631
self.get_time = self.ReadPTS
632
self.start = self.get_time(buffer[offset+timestamp:offset+timestamp+5])
633
if self.sequence_header_offset and hasattr(self, 'start'):
634
# we have all informations we need
638
if offset + 1000 < len(buffer) and len(buffer) < 1000000 or 1:
639
# looks like a pes, read more
640
buffer += file.read(10000)
642
if not self.video and not self.audio:
643
# no video and no audio?
646
self.type = 'MPEG-PES'
648
# fill in values for support functions:
649
self.__seek_size__ = 10000000 # 10 MB
650
self.__sample_size__ = 500000 # 500 k scanning
651
self.__search__ = self._find_timer_PES_
652
self.filename = file.name
654
# get length of the file
655
self.length = self.get_length()
659
def _find_timer_PES_(self, buffer):
661
Return position of timer in buffer or -1 if not found.
662
This function is valid for PES files
664
pos = buffer.find('\x00\x00\x01')
666
if pos == -1 or offset + 1000 >= len(buffer):
671
while offset + 1000 < len(buffer):
672
pos, timestamp = self.ReadPESHeader(offset, buffer[offset:])
673
if timestamp != -1 and retpos == -1:
674
retpos = offset + timestamp
676
# Oops, that was a mpeg header, no PES header
677
offset += buffer[offset:].find('\x00\x00\x01')
690
# Transport Stream ===============================================
692
def isTS(self, file):
695
buffer = file.read(TS_PACKET_LENGTH * 2)
698
while c + TS_PACKET_LENGTH < len(buffer):
699
if ord(buffer[c]) == ord(buffer[c+TS_PACKET_LENGTH]) == TS_SYNC:
705
buffer += file.read(10000)
706
self.type = 'MPEG-TS'
708
while c + TS_PACKET_LENGTH < len(buffer):
709
start = ord(buffer[c+1]) & 0x40
710
# maybe load more into the buffer
711
if c + 2 * TS_PACKET_LENGTH > len(buffer) and c < 500000:
712
buffer += file.read(10000)
714
# wait until the ts payload contains a payload header
716
c += TS_PACKET_LENGTH
719
tsid = ((ord(buffer[c+1]) & 0x3F) << 8) + ord(buffer[c+2])
720
adapt = (ord(buffer[c+3]) & 0x30) >> 4
724
# meta info present, skip it for now
725
adapt_len = ord(buffer[c+offset])
726
offset += adapt_len + 1
728
if not ord(buffer[c+1]) & 0x40:
729
# no new pes or psi in stream payload starting
733
timestamp = self.ReadPESHeader(c+offset, buffer[c+offset:], tsid)[1]
735
if not hasattr(self, 'start'):
736
self.get_time = self.ReadPTS
737
timestamp = c + offset + timestamp
738
self.start = self.get_time(buffer[timestamp:timestamp+5])
739
elif not hasattr(self, 'audio_ok'):
740
timestamp = c + offset + timestamp
741
start = self.get_time(buffer[timestamp:timestamp+5])
742
if abs(start - self.start) < 10:
749
print 'Timestamp error, correcting'
751
if hasattr(self, 'start') and self.start and \
752
self.sequence_header_offset and self.video and self.audio:
755
c += TS_PACKET_LENGTH
758
if not self.sequence_header_offset:
761
if hasattr(self, 'start') and self.start:
762
self.keys.append('start')
764
# fill in values for support functions:
765
self.__seek_size__ = 10000000 # 10 MB
766
self.__sample_size__ = 100000 # 100 k scanning
767
self.__search__ = self._find_timer_TS_
768
self.filename = file.name
770
# get length of the file
771
self.length = self.get_length()
775
def _find_timer_TS_(self, buffer):
778
while c + TS_PACKET_LENGTH < len(buffer):
779
if ord(buffer[c]) == ord(buffer[c+TS_PACKET_LENGTH]) == TS_SYNC:
785
while c + TS_PACKET_LENGTH < len(buffer):
786
start = ord(buffer[c+1]) & 0x40
788
c += TS_PACKET_LENGTH
791
tsid = ((ord(buffer[c+1]) & 0x3F) << 8) + ord(buffer[c+2])
792
adapt = (ord(buffer[c+3]) & 0x30) >> 4
796
# meta info present, skip it for now
797
offset += ord(buffer[c+offset]) + 1
800
timestamp = self.ReadPESHeader(c+offset, buffer[c+offset:], tsid)[1]
801
return c + offset + timestamp
802
c += TS_PACKET_LENGTH
807
# Support functions ==============================================
809
def get_endpos(self):
811
get the last timestamp of the mpeg, return -1 if this is not possible
813
if not hasattr(self, 'filename') or not hasattr(self, 'start'):
816
file = open(self.filename)
817
file.seek(os.stat(self.filename)[stat.ST_SIZE]-self.__sample_size__)
818
buffer = file.read(self.__sample_size__)
822
pos = self.__search__(buffer)
825
end = self.get_time(buffer[pos:])
826
buffer = buffer[pos+100:]
832
def get_length(self):
834
get the length in seconds, return -1 if this is not possible
836
end = self.get_endpos()
840
return int(((long(1) << 33) - 1 ) / 90000) - self.start + end
841
return end - self.start
844
def seek(self, end_time):
846
Return the byte position in the file where the time position
847
is 'pos' seconds. Return 0 if this is not possible
849
if not hasattr(self, 'filename') or not hasattr(self, 'start'):
852
file = open(self.filename)
856
file.seek(self.__seek_size__, 1)
857
buffer = file.read(self.__sample_size__)
858
if len(buffer) < 10000:
860
pos = self.__search__(buffer)
863
if self.get_time(buffer[pos:]) >= end_time:
867
seek_to = file.tell()
875
scan file for timestamps (may take a long time)
877
if not hasattr(self, 'filename') or not hasattr(self, 'start'):
879
file = open(self.filename)
880
print 'scanning file...'
882
file.seek(self.__seek_size__ * 10, 1)
883
buffer = file.read(self.__sample_size__)
884
if len(buffer) < 10000:
886
pos = self.__search__(buffer)
889
print self.get_time(buffer[pos:])
897
#mmpython.registertype( 'video/mpeg', ('mpeg','mpg','mp4', 'ts'), mediainfo.TYPE_AV, MpegInfo )