cg
diff DocumentConverter.py @ 9:3480ab8239f5
.
author | bshanks@bshanks.dyndns.org |
---|---|
date | Sat Apr 11 21:34:35 2009 -0700 (16 years ago) |
parents | |
children |
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000
1.2 +++ b/DocumentConverter.py Sat Apr 11 21:34:35 2009 -0700
1.3 @@ -0,0 +1,151 @@
1.4 +#!/usr/bin/python
1.5 +#
1.6 +# PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
1.7 +#
1.8 +# This script converts a document from one office format to another by
1.9 +# connecting to an OpenOffice.org instance via Python-UNO bridge.
1.10 +#
1.11 +# Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
1.12 +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
1.13 +# - or any later version.
1.14 +#
1.15 +DEFAULT_OPENOFFICE_PORT = 8100
1.16 +
1.17 +import uno
1.18 +from os.path import abspath, isfile, splitext
1.19 +from com.sun.star.beans import PropertyValue
1.20 +from com.sun.star.task import ErrorCodeIOException
1.21 +from com.sun.star.connection import NoConnectException
1.22 +
1.23 +FAMILY_TEXT = "Text"
1.24 +FAMILY_SPREADSHEET = "Spreadsheet"
1.25 +FAMILY_PRESENTATION = "Presentation"
1.26 +FAMILY_DRAWING = "Drawing"
1.27 +
1.28 +FILTER_MAP = {
1.29 + "pdf": {
1.30 + FAMILY_TEXT: "writer_pdf_Export",
1.31 + FAMILY_SPREADSHEET: "calc_pdf_Export",
1.32 + FAMILY_PRESENTATION: "impress_pdf_Export",
1.33 + FAMILY_DRAWING: "draw_pdf_Export"
1.34 + },
1.35 + "html": {
1.36 + FAMILY_TEXT: "HTML (StarWriter)",
1.37 + FAMILY_SPREADSHEET: "HTML (StarCalc)",
1.38 + FAMILY_PRESENTATION: "impress_html_Export"
1.39 + },
1.40 + "odt": { FAMILY_TEXT: "writer8" },
1.41 + "doc": { FAMILY_TEXT: "MS Word 97" },
1.42 + "rtf": { FAMILY_TEXT: "Rich Text Format" },
1.43 + "txt": { FAMILY_TEXT: "Text" },
1.44 + "ods": { FAMILY_SPREADSHEET: "calc8" },
1.45 + "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
1.46 + "odp": { FAMILY_PRESENTATION: "impress8" },
1.47 + "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
1.48 + "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
1.49 +}
1.50 +# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
1.51 +# for more available filters
1.52 +
1.53 +
1.54 +class DocumentConversionException(Exception):
1.55 +
1.56 + def __init__(self, message):
1.57 + self.message = message
1.58 +
1.59 + def __str__(self):
1.60 + return self.message
1.61 +
1.62 +
1.63 +class DocumentConverter:
1.64 +
1.65 + def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
1.66 + localContext = uno.getComponentContext()
1.67 + resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
1.68 + try:
1.69 + context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
1.70 + except NoConnectException:
1.71 + raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
1.72 + self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
1.73 +
1.74 + def convert(self, inputFile, outputFile):
1.75 +
1.76 + inputUrl = self._toFileUrl(inputFile)
1.77 + outputUrl = self._toFileUrl(outputFile)
1.78 +
1.79 + document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
1.80 + try:
1.81 + document.refresh()
1.82 + except AttributeError:
1.83 + pass
1.84 +
1.85 + outputExt = self._getFileExt(outputFile)
1.86 + filterName = self._filterName(document, outputExt)
1.87 +
1.88 + try:
1.89 + document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
1.90 + finally:
1.91 + document.close(True)
1.92 +
1.93 + def _filterName(self, document, outputExt):
1.94 + family = self._detectFamily(document)
1.95 + try:
1.96 + filterByFamily = FILTER_MAP[outputExt]
1.97 + except KeyError:
1.98 + raise DocumentConversionException, "unknown output format: '%s'" % outputExt
1.99 + try:
1.100 + return filterByFamily[family]
1.101 + except KeyError:
1.102 + raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
1.103 +
1.104 + def _detectFamily(self, document):
1.105 + if document.supportsService("com.sun.star.text.GenericTextDocument"):
1.106 + # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
1.107 + # but this further distinction doesn't seem to matter for conversions
1.108 + return FAMILY_TEXT
1.109 + if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
1.110 + return FAMILY_SPREADSHEET
1.111 + if document.supportsService("com.sun.star.presentation.PresentationDocument"):
1.112 + return FAMILY_PRESENTATION
1.113 + if document.supportsService("com.sun.star.drawing.DrawingDocument"):
1.114 + return FAMILY_DRAWING
1.115 + raise DocumentConversionException, "unknown document family: %s" % document
1.116 +
1.117 + def _getFileExt(self, path):
1.118 + ext = splitext(path)[1]
1.119 + if ext is not None:
1.120 + return ext[1:].lower()
1.121 +
1.122 + def _toFileUrl(self, path):
1.123 + return uno.systemPathToFileUrl(abspath(path))
1.124 +
1.125 + def _toProperties(self, **args):
1.126 + props = []
1.127 + for key in args:
1.128 + prop = PropertyValue()
1.129 + prop.Name = key
1.130 + prop.Value = args[key]
1.131 + props.append(prop)
1.132 + return tuple(props)
1.133 +
1.134 +
1.135 +if __name__ == "__main__":
1.136 + from sys import argv, exit
1.137 +
1.138 + if len(argv) < 3:
1.139 + print "USAGE: python %s <input-file> <output-file>" % argv[0]
1.140 + exit(255)
1.141 + if not isfile(argv[1]):
1.142 + print "no such input file: %s" % argv[1]
1.143 + exit(1)
1.144 +
1.145 + try:
1.146 + converter = DocumentConverter()
1.147 + converter.convert(argv[1], argv[2])
1.148 + except DocumentConversionException, exception:
1.149 + print "ERROR!" + str(exception)
1.150 + exit(1)
1.151 + except ErrorCodeIOException, exception:
1.152 + print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
1.153 + exit(1)
1.154 +