~davewalker/etherpad/ubuntu-unlimited-max-users-and-revisions

« back to all changes in this revision

Viewing changes to bin/DocumentConverter.py

  • Committer: James Page
  • Date: 2011-04-13 08:00:43 UTC
  • Revision ID: james.page@canonical.com-20110413080043-eee2nq7y1v7cv2mp
* Refactoring to use native Ubuntu Java libraries. 
* debian/control:
  - use openjdk instead of sun's java
  - update maintainer
* debian/etherpad.init.orig, debian/etherpad.upstart:
  - move the init script out of the way
  - create a basic upstart script
  - note that the open office document conversion daemon was dropped
    from the upstart configuration; if this behavior is desired, please
    create a separate upstart job for it
* debian/rules:
  - just use basic dh_installinit, as it will pick up the new upstart job
* New release
* Changed maintainer to Packaging
* Fixed installation scripts
* Initial Release.

Show diffs side-by-side

added added

removed removed

Lines of Context:
 
1
#!/usr/bin/python
 
2
#
 
3
# PyODConverter (Python OpenDocument Converter) v1.1 - 2009-11-14
 
4
# Modifications by Mikko Rantalainen <mikko.rantalainen@peda.net>
 
5
#
 
6
# This script converts a document from one office format to another by
 
7
# connecting to an OpenOffice.org instance via Python-UNO bridge.
 
8
#
 
9
# Copyright (C) 2008-2009 Mirko Nasato <mirko@artofsolving.com>
 
10
# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
 
11
# - or any later version.
 
12
#
 
13
# See also:
 
14
# http://www.artofsolving.com/opensource/pyodconverter
 
15
# http://www.linuxjournal.com/content/starting-stopping-and-connecting-openoffice-python
 
16
#
 
17
 
 
18
DEFAULT_OPENOFFICE_PORT = 8100
 
19
 
 
20
import sys
 
21
import os
 
22
import time
 
23
 
 
24
# Find OpenOffice.
 
25
_oopaths=(
 
26
        ('/usr/lib/openoffice/program',   '/usr/lib/openoffice/program'),
 
27
        ('/usr/lib64/ooo-2.0/program',   '/usr/lib64/ooo-2.0/program'),
 
28
        ('/opt/openoffice.org3/program', '/opt/openoffice.org/basis3.0/program'),
 
29
     )
 
30
 
 
31
for p in _oopaths:
 
32
    if os.path.exists(p[0]):
 
33
        OPENOFFICE_PATH    = p[0]
 
34
        OPENOFFICE_BIN     = os.path.join(OPENOFFICE_PATH, 'soffice')
 
35
        OPENOFFICE_LIBPATH = p[1]
 
36
 
 
37
        # Add to path so we can find uno.
 
38
        if sys.path.count(OPENOFFICE_LIBPATH) == 0:
 
39
            sys.path.insert(0, OPENOFFICE_LIBPATH)
 
40
        break
 
41
 
 
42
import uno
 
43
from os.path import abspath, isfile, splitext
 
44
from com.sun.star.beans import PropertyValue
 
45
from com.sun.star.task import ErrorCodeIOException
 
46
from com.sun.star.connection import NoConnectException
 
47
 
 
48
FAMILY_TEXT = "Text"
 
49
FAMILY_WEB = "Web"
 
50
FAMILY_SPREADSHEET = "Spreadsheet"
 
51
FAMILY_PRESENTATION = "Presentation"
 
52
FAMILY_DRAWING = "Drawing"
 
53
 
 
54
#---------------------#
 
55
# Configuration Start #
 
56
#---------------------#
 
57
 
 
58
# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
 
59
 
 
60
# most formats are auto-detected; only those requiring options are defined here
 
61
IMPORT_FILTER_MAP = {
 
62
    "txt": {
 
63
        "FilterName": "Text (encoded)",
 
64
        "FilterOptions": "utf8"
 
65
    },
 
66
    "csv": {
 
67
        "FilterName": "Text - txt - csv (StarCalc)",
 
68
        "FilterOptions": "44,34,0"
 
69
    }
 
70
}
 
71
 
 
72
EXPORT_FILTER_MAP = {
 
73
    "pdf": {
 
74
        FAMILY_TEXT: { "FilterName": "writer_pdf_Export" },
 
75
        FAMILY_WEB: { "FilterName": "writer_web_pdf_Export" },
 
76
        FAMILY_SPREADSHEET: { "FilterName": "calc_pdf_Export" },
 
77
        FAMILY_PRESENTATION: { "FilterName": "impress_pdf_Export" },
 
78
        FAMILY_DRAWING: { "FilterName": "draw_pdf_Export" }
 
79
    },
 
80
    "html": {
 
81
        FAMILY_TEXT: { "FilterName": "HTML (StarWriter)" },
 
82
        FAMILY_SPREADSHEET: { "FilterName": "HTML (StarCalc)" },
 
83
        FAMILY_PRESENTATION: { "FilterName": "impress_html_Export" }
 
84
    },
 
85
    "odt": {
 
86
        FAMILY_TEXT: { "FilterName": "writer8" },
 
87
        FAMILY_WEB: { "FilterName": "writerweb8_writer" }
 
88
    },
 
89
    "doc": {
 
90
        FAMILY_TEXT: { "FilterName": "MS Word 97" }
 
91
    },
 
92
    "rtf": {
 
93
        FAMILY_TEXT: { "FilterName": "Rich Text Format" }
 
94
    },
 
95
    "txt": {
 
96
        FAMILY_TEXT: {
 
97
            "FilterName": "Text",
 
98
            "FilterOptions": "utf8"
 
99
        }
 
100
    },
 
101
    "ods": {
 
102
        FAMILY_SPREADSHEET: { "FilterName": "calc8" }
 
103
    },
 
104
    "xls": {
 
105
        FAMILY_SPREADSHEET: { "FilterName": "MS Excel 97" }
 
106
    },
 
107
    "csv": {
 
108
        FAMILY_SPREADSHEET: {
 
109
            "FilterName": "Text - txt - csv (StarCalc)",
 
110
            "FilterOptions": "44,34,0"
 
111
        }
 
112
    },
 
113
    "odp": {
 
114
        FAMILY_PRESENTATION: { "FilterName": "impress8" }
 
115
    },
 
116
    "ppt": {
 
117
        FAMILY_PRESENTATION: { "FilterName": "MS PowerPoint 97" }
 
118
    },
 
119
    "swf": {
 
120
        FAMILY_DRAWING: { "FilterName": "draw_flash_Export" },
 
121
        FAMILY_PRESENTATION: { "FilterName": "impress_flash_Export" }
 
122
    }
 
123
}
 
124
 
 
125
PAGE_STYLE_OVERRIDE_PROPERTIES = {
 
126
    FAMILY_SPREADSHEET: {
 
127
        #--- Scale options: uncomment 1 of the 3 ---
 
128
        # a) 'Reduce / enlarge printout': 'Scaling factor'
 
129
        "PageScale": 100,
 
130
        # b) 'Fit print range(s) to width / height': 'Width in pages' and 'Height in pages'
 
131
        #"ScaleToPagesX": 1, "ScaleToPagesY": 1000,
 
132
        # c) 'Fit print range(s) on number of pages': 'Fit print range(s) on number of pages'
 
133
        #"ScaleToPages": 1,
 
134
        "PrintGrid": False
 
135
    }
 
136
}
 
137
 
 
138
#-------------------#
 
139
# Configuration End #
 
140
#-------------------#
 
141
 
 
142
class OOService:
 
143
    """
 
144
    Start, stop, and connect to OpenOffice.
 
145
    """
 
146
    def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
 
147
        """ Create OORunner that connects on the specified port. """
 
148
        self.port = port
 
149
 
 
150
 
 
151
    def connect(self, no_startup=False):
 
152
        """
 
153
        Connect to OpenOffice.
 
154
        If a connection cannot be established try to start OpenOffice.
 
155
        """
 
156
        localContext = uno.getComponentContext()
 
157
        resolver     = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
 
158
        context      = None
 
159
 
 
160
        n = 0
 
161
        while n < 6:
 
162
            try:
 
163
                context = resolver.resolve("uno:socket,host=localhost,port=%d;urp;StarOffice.ComponentContext" % self.port)
 
164
                break
 
165
            except NoConnectException:
 
166
                pass
 
167
 
 
168
            # If first connect failed then try starting OpenOffice.
 
169
            if n == 0:
 
170
                # Exit loop if startup not desired.
 
171
                if no_startup:
 
172
                     break
 
173
                self.startup()
 
174
 
 
175
            # Pause and try again to connect
 
176
            time.sleep(1)
 
177
            n += 1
 
178
 
 
179
        if not context:
 
180
            raise Exception, "Failed to connect to OpenOffice on port %d" % self.port
 
181
 
 
182
        desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
 
183
 
 
184
        if not desktop:
 
185
            raise Exception, "Failed to create OpenOffice desktop on port %d" % self.port
 
186
 
 
187
        return desktop
 
188
 
 
189
 
 
190
    def startup(self):
 
191
        """
 
192
        Start a headless instance of OpenOffice.
 
193
        """
 
194
        args = [OPENOFFICE_BIN,
 
195
                '-accept=socket,host=localhost,port=%d;urp;StarOffice.ServiceManager' % self.port,
 
196
                '-norestore',
 
197
                '-nofirststartwizard',
 
198
                '-nologo',
 
199
                '-headless',
 
200
                ]
 
201
        env  = {'PATH'       : '/bin:/usr/bin:%s' % OPENOFFICE_PATH,
 
202
                'PYTHONPATH' : OPENOFFICE_LIBPATH,
 
203
                }
 
204
 
 
205
        try:
 
206
            pid = os.spawnve(os.P_NOWAIT, args[0], args, env)
 
207
        except Exception, e:
 
208
            raise Exception, "Failed to start OpenOffice on port %d: %s" % (self.port, e.message)
 
209
 
 
210
        if pid <= 0:
 
211
            raise Exception, "Failed to start OpenOffice on port %d" % self.port
 
212
 
 
213
 
 
214
    def shutdown(self):
 
215
        """
 
216
        Shutdown OpenOffice.
 
217
        """
 
218
        try:
 
219
            desktop = self.connect(True)
 
220
            if desktop:
 
221
                desktop.terminate()
 
222
        except Exception, e:
 
223
#            pass
 
224
            raise Exception, "Failed to shutdown the process: %s" % (e.message)
 
225
 
 
226
 
 
227
 
 
228
 
 
229
class DocumentConversionException(Exception):
 
230
 
 
231
    def __init__(self, message):
 
232
        self.message = message
 
233
 
 
234
    def __str__(self):
 
235
        return self.message
 
236
 
 
237
 
 
238
class DocumentConverter:
 
239
    
 
240
    def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
 
241
        localContext = uno.getComponentContext()
 
242
        resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
 
243
        try:
 
244
            context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
 
245
        except NoConnectException:
 
246
            raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
 
247
        self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
 
248
 
 
249
    def terminate(self):
 
250
        self.desktop.terminate()
 
251
 
 
252
    def convert(self, inputFile, outputFile):
 
253
 
 
254
        inputUrl = self._toFileUrl(inputFile)
 
255
        outputUrl = self._toFileUrl(outputFile)
 
256
 
 
257
        loadProperties = { "Hidden": True }
 
258
        inputExt = self._getFileExt(inputFile)
 
259
        if IMPORT_FILTER_MAP.has_key(inputExt):
 
260
            loadProperties.update(IMPORT_FILTER_MAP[inputExt])
 
261
        
 
262
        document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(loadProperties))
 
263
        try:
 
264
            document.refresh()
 
265
        except AttributeError:
 
266
            pass
 
267
 
 
268
        family = self._detectFamily(document)
 
269
        self._overridePageStyleProperties(document, family)
 
270
        
 
271
        outputExt = self._getFileExt(outputFile)
 
272
        storeProperties = self._getStoreProperties(document, outputExt)
 
273
 
 
274
        try:
 
275
            document.storeToURL(outputUrl, self._toProperties(storeProperties))
 
276
        finally:
 
277
            document.close(True)
 
278
 
 
279
    def _overridePageStyleProperties(self, document, family):
 
280
        if PAGE_STYLE_OVERRIDE_PROPERTIES.has_key(family):
 
281
            properties = PAGE_STYLE_OVERRIDE_PROPERTIES[family]
 
282
            pageStyles = document.getStyleFamilies().getByName('PageStyles')
 
283
            for styleName in pageStyles.getElementNames():
 
284
                pageStyle = pageStyles.getByName(styleName)
 
285
                for name, value in properties.items():
 
286
                    pageStyle.setPropertyValue(name, value)
 
287
 
 
288
    def _getStoreProperties(self, document, outputExt):
 
289
        family = self._detectFamily(document)
 
290
        try:
 
291
            propertiesByFamily = EXPORT_FILTER_MAP[outputExt]
 
292
        except KeyError:
 
293
            raise DocumentConversionException, "unknown output format: '%s'" % outputExt
 
294
        try:
 
295
            return propertiesByFamily[family]
 
296
        except KeyError:
 
297
            raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
 
298
    
 
299
    def _detectFamily(self, document):
 
300
        if document.supportsService("com.sun.star.text.WebDocument"):
 
301
            return FAMILY_WEB
 
302
        if document.supportsService("com.sun.star.text.GenericTextDocument"):
 
303
            # must be TextDocument or GlobalDocument
 
304
            return FAMILY_TEXT
 
305
        if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
 
306
            return FAMILY_SPREADSHEET
 
307
        if document.supportsService("com.sun.star.presentation.PresentationDocument"):
 
308
            return FAMILY_PRESENTATION
 
309
        if document.supportsService("com.sun.star.drawing.DrawingDocument"):
 
310
            return FAMILY_DRAWING
 
311
        raise DocumentConversionException, "unknown document family: %s" % document
 
312
 
 
313
    def _getFileExt(self, path):
 
314
        ext = splitext(path)[1]
 
315
        if ext is not None:
 
316
            return ext[1:].lower()
 
317
 
 
318
    def _toFileUrl(self, path):
 
319
        return uno.systemPathToFileUrl(abspath(path))
 
320
 
 
321
    def _toProperties(self, dict):
 
322
        props = []
 
323
        for key in dict:
 
324
            prop = PropertyValue()
 
325
            prop.Name = key
 
326
            prop.Value = dict[key]
 
327
            props.append(prop)
 
328
        return tuple(props)
 
329
 
 
330
 
 
331
if __name__ == "__main__":
 
332
    from sys import argv, exit
 
333
    
 
334
    if argv[1] == "--daemon":
 
335
        try:
 
336
            service = OOService()
 
337
            service.startup()
 
338
            exit(0)
 
339
        except ErrorCodeIOException, e:
 
340
            print "Failed to start daemon process: %s" % e.message
 
341
            exit(1)
 
342
 
 
343
    if argv[1] == "--shutdown":
 
344
        try:
 
345
            service = OOService()
 
346
            service.shutdown()
 
347
            exit(0)
 
348
        except ErrorCodeIOException, e:
 
349
            print "Failed to shut down daemon process: %s" % e.message
 
350
            exit(1)
 
351
 
 
352
    if len(argv) < 3:
 
353
        print "USAGE: python %s <input-file> <output-file>" % argv[0]
 
354
        exit(255)
 
355
    elif not isfile(argv[1]):
 
356
        print "no such input file: %s" % argv[1]
 
357
        exit(1)
 
358
    try:
 
359
        converter = DocumentConverter()    
 
360
        converter.convert(argv[1], argv[2])
 
361
    except DocumentConversionException, exception:
 
362
        print "ERROR! " + str(exception)
 
363
        exit(1)
 
364
    except ErrorCodeIOException, exception:
 
365
        print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
 
366
        exit(1)
 
367