bshanks@8: #!/usr/bin/python bshanks@8: # bshanks@8: # PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05 bshanks@8: # bshanks@8: # This script converts a document from one office format to another by bshanks@8: # connecting to an OpenOffice.org instance via Python-UNO bridge. bshanks@8: # bshanks@8: # Copyright (C) 2008 Mirko Nasato bshanks@8: # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html bshanks@8: # - or any later version. bshanks@8: # bshanks@8: DEFAULT_OPENOFFICE_PORT = 8100 bshanks@8: bshanks@8: import uno bshanks@8: from os.path import abspath, isfile, splitext bshanks@8: from com.sun.star.beans import PropertyValue bshanks@8: from com.sun.star.task import ErrorCodeIOException bshanks@8: from com.sun.star.connection import NoConnectException bshanks@8: bshanks@8: FAMILY_TEXT = "Text" bshanks@8: FAMILY_SPREADSHEET = "Spreadsheet" bshanks@8: FAMILY_PRESENTATION = "Presentation" bshanks@8: FAMILY_DRAWING = "Drawing" bshanks@8: bshanks@8: FILTER_MAP = { bshanks@8: "pdf": { bshanks@8: FAMILY_TEXT: "writer_pdf_Export", bshanks@8: FAMILY_SPREADSHEET: "calc_pdf_Export", bshanks@8: FAMILY_PRESENTATION: "impress_pdf_Export", bshanks@8: FAMILY_DRAWING: "draw_pdf_Export" bshanks@8: }, bshanks@8: "html": { bshanks@8: FAMILY_TEXT: "HTML (StarWriter)", bshanks@8: FAMILY_SPREADSHEET: "HTML (StarCalc)", bshanks@8: FAMILY_PRESENTATION: "impress_html_Export" bshanks@8: }, bshanks@8: "odt": { FAMILY_TEXT: "writer8" }, bshanks@8: "doc": { FAMILY_TEXT: "MS Word 97" }, bshanks@8: "rtf": { FAMILY_TEXT: "Rich Text Format" }, bshanks@8: "txt": { FAMILY_TEXT: "Text" }, bshanks@8: "ods": { FAMILY_SPREADSHEET: "calc8" }, bshanks@8: "xls": { FAMILY_SPREADSHEET: "MS Excel 97" }, bshanks@8: "odp": { FAMILY_PRESENTATION: "impress8" }, bshanks@8: "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" }, bshanks@8: "swf": { FAMILY_PRESENTATION: "impress_flash_Export" } bshanks@8: } bshanks@8: # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter bshanks@8: # for more available filters bshanks@8: bshanks@8: bshanks@8: class DocumentConversionException(Exception): bshanks@8: bshanks@8: def __init__(self, message): bshanks@8: self.message = message bshanks@8: bshanks@8: def __str__(self): bshanks@8: return self.message bshanks@8: bshanks@8: bshanks@8: class DocumentConverter: bshanks@8: bshanks@8: def __init__(self, port=DEFAULT_OPENOFFICE_PORT): bshanks@8: localContext = uno.getComponentContext() bshanks@8: resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) bshanks@8: try: bshanks@8: context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port) bshanks@8: except NoConnectException: bshanks@8: raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port bshanks@8: self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context) bshanks@8: bshanks@8: def convert(self, inputFile, outputFile): bshanks@8: bshanks@8: inputUrl = self._toFileUrl(inputFile) bshanks@8: outputUrl = self._toFileUrl(outputFile) bshanks@8: bshanks@8: document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True)) bshanks@8: try: bshanks@8: document.refresh() bshanks@8: except AttributeError: bshanks@8: pass bshanks@8: bshanks@8: outputExt = self._getFileExt(outputFile) bshanks@8: filterName = self._filterName(document, outputExt) bshanks@8: bshanks@8: try: bshanks@8: document.storeToURL(outputUrl, self._toProperties(FilterName=filterName)) bshanks@8: finally: bshanks@8: document.close(True) bshanks@8: bshanks@8: def _filterName(self, document, outputExt): bshanks@8: family = self._detectFamily(document) bshanks@8: try: bshanks@8: filterByFamily = FILTER_MAP[outputExt] bshanks@8: except KeyError: bshanks@8: raise DocumentConversionException, "unknown output format: '%s'" % outputExt bshanks@8: try: bshanks@8: return filterByFamily[family] bshanks@8: except KeyError: bshanks@8: raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt) bshanks@8: bshanks@8: def _detectFamily(self, document): bshanks@8: if document.supportsService("com.sun.star.text.GenericTextDocument"): bshanks@8: # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument bshanks@8: # but this further distinction doesn't seem to matter for conversions bshanks@8: return FAMILY_TEXT bshanks@8: if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"): bshanks@8: return FAMILY_SPREADSHEET bshanks@8: if document.supportsService("com.sun.star.presentation.PresentationDocument"): bshanks@8: return FAMILY_PRESENTATION bshanks@8: if document.supportsService("com.sun.star.drawing.DrawingDocument"): bshanks@8: return FAMILY_DRAWING bshanks@8: raise DocumentConversionException, "unknown document family: %s" % document bshanks@8: bshanks@8: def _getFileExt(self, path): bshanks@8: ext = splitext(path)[1] bshanks@8: if ext is not None: bshanks@8: return ext[1:].lower() bshanks@8: bshanks@8: def _toFileUrl(self, path): bshanks@8: return uno.systemPathToFileUrl(abspath(path)) bshanks@8: bshanks@8: def _toProperties(self, **args): bshanks@8: props = [] bshanks@8: for key in args: bshanks@8: prop = PropertyValue() bshanks@8: prop.Name = key bshanks@8: prop.Value = args[key] bshanks@8: props.append(prop) bshanks@8: return tuple(props) bshanks@8: bshanks@8: bshanks@8: if __name__ == "__main__": bshanks@8: from sys import argv, exit bshanks@8: bshanks@8: if len(argv) < 3: bshanks@8: print "USAGE: python %s " % argv[0] bshanks@8: exit(255) bshanks@8: if not isfile(argv[1]): bshanks@8: print "no such input file: %s" % argv[1] bshanks@8: exit(1) bshanks@8: bshanks@8: try: bshanks@8: converter = DocumentConverter() bshanks@8: converter.convert(argv[1], argv[2]) bshanks@8: except DocumentConversionException, exception: bshanks@8: print "ERROR!" + str(exception) bshanks@8: exit(1) bshanks@8: except ErrorCodeIOException, exception: bshanks@8: print "ERROR! ErrorCodeIOException %d" % exception.ErrCode bshanks@8: exit(1) bshanks@8: