~dmedia/dmedia/trunk : contents of dmedia-extract at revision 939

~dmedia/dmedia/trunk : (revision 939)
#!/usr/bin/python3

# dmedia: distributed media library
# Copyright (C) 2011 Novacut Inc
#
# This file is part of `dmedia`.
#
# `dmedia` is free software: you can redistribute it and/or modify it under
# the terms of the GNU Affero General Public License as published by the Free
# Software Foundation, either version 3 of the License, or (at your option) any
# later version.
#
# `dmedia` is distributed in the hope that it will be useful, but WITHOUT ANY
# WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR
# A PARTICULAR PURPOSE.  See the GNU Affero General Public License for more
# details.
#
# You should have received a copy of the GNU Affero General Public License along
# with `dmedia`.  If not, see <http://www.gnu.org/licenses/>.
#
# Authors:
#   Jason Gerard DeRose <jderose@novacut.com>

"""
A quick and dirty GStreamer extractor.
"""

import sys
from os import path
import json
import optparse


parser = optparse.OptionParser()
parser.add_option('--full',
    help='do full play-through do gaurantee duration in fames and samples',
    action='store_true',
    default=False,
)
(options, args) = parser.parse_args()

if len(args) != 1:
    print('takes exacly 1 argument FILENAME')
    sys.exit(1)
filename = path.abspath(args[0])
if not path.isfile(filename):
    print('not a file: {!r}'.format(filename))
    sys.exit(1)


import gi
gi.require_version('Gst', '1.0')
from gi.repository import GLib, Gst


Gst.init(None)


class FakeBin(Gst.Bin):
    def __init__(self, callback, doc, rate):
        super().__init__()
        self._callback = callback
        self._doc = doc
        self._q = Gst.ElementFactory.make('queue', None)
        self._rate = Gst.ElementFactory.make(rate, None)
        self._sink = Gst.ElementFactory.make('fakesink', None)
        self.add(self._q)
        self.add(self._rate)
        self.add(self._sink)
        self._q.link(self._rate)
        self._rate.link(self._sink)

        # Ghost Pads
        pad = self._q.get_static_pad('sink')
        self.add_pad(
            Gst.GhostPad.new('sink', pad)
        )
        pad.connect('notify::caps', self._on_notify_caps)

    def _query_duration(self, pad):
        q = Gst.Query.new_duration(Gst.Format.TIME)
        # FIXME: We get an intermittent failures here with this exception:
        #     TypeError: argument query: Expected Gst.Query, but got gi.repository.Gst.Query
        # See LP:1069582
        #     https://bugs.launchpad.net/dmedia/+bug/1069582
        try:
            if pad.get_peer().query(q):
                (format, duration) = q.parse_duration()
                if format == Gst.Format.TIME:
                    return duration
        except TypeError:
            pass

    def _on_notify_caps(self, pad, args):
        caps = pad.get_current_caps()
        if not caps:
            return
        ns = self._query_duration(pad)
        self._extract(caps.get_structure(0), ns)
        self._callback(self)


class VideoBin(FakeBin):
    def __init__(self, callback, doc):
        super().__init__(callback, doc, 'videorate')

    def _extract(self, d, ns):
        (success, num, denom) = d.get_fraction('framerate')
        if num != 0:
            self._doc['framerate'] = {
                'num': num,
                'denom': denom,
            }
        self._doc['width'] = d.get_int('width')[1]
        self._doc['height'] = d.get_int('height')[1]
        if ns:
            self._doc['video_ns'] = ns
            frames = int(
                round(ns * num / denom / float(Gst.SECOND))
            )
            self._doc['duration']['frames'] = frames
            # FIXME: Quick work-around for us not being able to thumbnail or
            # playthrough the last few frames on GStreamer 1.0 on Ubuntu Trusty:
            #self._doc['duration']['frames'] = max(1, frames - 15)

    def _finalize(self):
        d = dict(
            (n, self._rate.get_property(n))
            for n in ('in', 'out', 'duplicate', 'drop')
        )
        d['consistent'] = bool(d['in'] + d['duplicate'] - d['drop'] == d['out'])
        d['matching'] = d['in'] == d['out'] == self._doc['duration']['frames']
        self._doc['frames'] = d


class AudioBin(FakeBin):
    def __init__(self, callback, doc):
        super().__init__(callback, doc, 'audiorate')

    def _extract(self, d, ns):
        samplerate = d.get_int('rate')[1]
        self._doc['samplerate'] = samplerate
        self._doc['channels'] = d.get_int('channels')[1]
        if ns:
            self._doc['audio_ns'] = ns
            self._doc['duration']['samples'] = samplerate * ns // Gst.SECOND
        self._callback(self)

    def _finalize(self):
        d = dict(
            (n, self._rate.get_property(n))
            for n in ('in', 'out', 'add', 'drop')
        )
        d['consistent'] = bool(d['in'] + d['add'] - d['drop'] == d['out'])
        d['matching'] = d['in'] == d['out'] == self._doc['duration']['samples']
        self._doc['samples'] = d


class Extractor(object):
    def __init__(self, filename, full):
        self.full = full
        self.doc = {'video_ns': 0, 'audio_ns': 0, 'duration': {}}
        self.mainloop = GLib.MainLoop()
        self.pipeline = Gst.Pipeline()

        # Create bus and connect several handlers
        self.bus = self.pipeline.get_bus()
        self.bus.add_signal_watch()
        self.bus.connect('message::eos', self.on_eos)
        self.bus.connect('message::error', self.on_error)

        # Create elements
        self.src = Gst.ElementFactory.make('filesrc', None)
        self.dec = Gst.ElementFactory.make('decodebin', None)

        # Set properties
        self.src.set_property('location', filename)

        # Connect handler for 'new-decoded-pad' signal
        self.dec.connect('pad-added', self.on_pad_added)
        self.dec.connect('no-more-pads', self.on_no_more_pads)
        self.typefind = self.dec.get_by_name('typefind')
        self.typefind.connect('have-type', self.on_have_type)

        # Add elements to pipeline
        self.pipeline.add(self.src)
        self.pipeline.add(self.dec)

        # Link elements
        self.src.link(self.dec)

        self.audio = None
        self.video = None
        self._killed = False

    def run(self):
        self.pipeline.set_state(Gst.State.PLAYING)
        GLib.timeout_add(2000, self.on_timeout)
        self.mainloop.run()

    def on_timeout(self):
        self.kill()

    def kill(self):
        if self._killed:
            return
        self._killed = True
        self.pipeline.set_state(Gst.State.NULL)
        ns = max(self.doc.pop('video_ns'), self.doc.pop('audio_ns'))
        if self.doc['duration']:
            self.doc['duration']['nanoseconds'] = ns
            self.doc['duration']['seconds'] = float(ns) / Gst.SECOND
            if self.video is not None:
                self.doc['media'] = 'video'
            elif self.audio is not None:
                self.doc['media'] = 'audio'
        else:
            del self.doc['duration']
            if self.video is not None:
                self.doc['media'] = 'image'
        self.mainloop.quit()

    def on_have_type(self, element, prop, caps):
        self.doc['content_type'] = caps.to_string().split(',')[0]

    def link_pad(self, pad, name):
        cls = {'audio': AudioBin, 'video': VideoBin}[name]
        fakebin = cls(self.on_callback, self.doc)
        self.pipeline.add(fakebin)
        pad.link(fakebin.get_static_pad('sink'))
        fakebin.set_state(Gst.State.PLAYING)
        return fakebin

    def on_pad_added(self, element, pad):
        string = pad.get_current_caps().to_string()
        if string.startswith('audio/'):
            assert self.audio is None
            self.audio = self.link_pad(pad, 'audio')
        elif string.startswith('video/'):
            assert self.video is None
            self.video = self.link_pad(pad, 'video')

    def on_no_more_pads(self, element):
        self.need = set(filter(None, [self.audio, self.video]))
        self.have = set()

    def on_callback(self, fakebin):
        self.have.add(fakebin)
        if self.have == self.need and not self.full:
            GLib.idle_add(self.kill)

    def on_eos(self, bus, msg):
        for fakebin in (self.audio, self.video):
            if fakebin is not None:
                try:
                    fakebin._finalize()
                except Exception:
                    pass
        self.kill()

    def on_error(self, bus, msg):
        #error = msg.parse_error()[1]
        self.kill()
        sys.exit(2)


extractor = Extractor(filename, options.full)
extractor.run()
print(json.dumps(extractor.doc, sort_keys=True, indent=4))