cg

diff DocumentConverter.py @ 8:3bc61ab8e776

.
author bshanks@bshanks.dyndns.org
date Sat Apr 11 21:31:27 2009 -0700 (16 years ago)
parents
children
line diff
1.1 --- /dev/null Thu Jan 01 00:00:00 1970 +0000 1.2 +++ b/DocumentConverter.py Sat Apr 11 21:31:27 2009 -0700 1.3 @@ -0,0 +1,151 @@ 1.4 +#!/usr/bin/python 1.5 +# 1.6 +# PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05 1.7 +# 1.8 +# This script converts a document from one office format to another by 1.9 +# connecting to an OpenOffice.org instance via Python-UNO bridge. 1.10 +# 1.11 +# Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com> 1.12 +# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html 1.13 +# - or any later version. 1.14 +# 1.15 +DEFAULT_OPENOFFICE_PORT = 8100 1.16 + 1.17 +import uno 1.18 +from os.path import abspath, isfile, splitext 1.19 +from com.sun.star.beans import PropertyValue 1.20 +from com.sun.star.task import ErrorCodeIOException 1.21 +from com.sun.star.connection import NoConnectException 1.22 + 1.23 +FAMILY_TEXT = "Text" 1.24 +FAMILY_SPREADSHEET = "Spreadsheet" 1.25 +FAMILY_PRESENTATION = "Presentation" 1.26 +FAMILY_DRAWING = "Drawing" 1.27 + 1.28 +FILTER_MAP = { 1.29 + "pdf": { 1.30 + FAMILY_TEXT: "writer_pdf_Export", 1.31 + FAMILY_SPREADSHEET: "calc_pdf_Export", 1.32 + FAMILY_PRESENTATION: "impress_pdf_Export", 1.33 + FAMILY_DRAWING: "draw_pdf_Export" 1.34 + }, 1.35 + "html": { 1.36 + FAMILY_TEXT: "HTML (StarWriter)", 1.37 + FAMILY_SPREADSHEET: "HTML (StarCalc)", 1.38 + FAMILY_PRESENTATION: "impress_html_Export" 1.39 + }, 1.40 + "odt": { FAMILY_TEXT: "writer8" }, 1.41 + "doc": { FAMILY_TEXT: "MS Word 97" }, 1.42 + "rtf": { FAMILY_TEXT: "Rich Text Format" }, 1.43 + "txt": { FAMILY_TEXT: "Text" }, 1.44 + "ods": { FAMILY_SPREADSHEET: "calc8" }, 1.45 + "xls": { FAMILY_SPREADSHEET: "MS Excel 97" }, 1.46 + "odp": { FAMILY_PRESENTATION: "impress8" }, 1.47 + "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" }, 1.48 + "swf": { FAMILY_PRESENTATION: "impress_flash_Export" } 1.49 +} 1.50 +# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter 1.51 +# for more available filters 1.52 + 1.53 + 1.54 +class DocumentConversionException(Exception): 1.55 + 1.56 + def __init__(self, message): 1.57 + self.message = message 1.58 + 1.59 + def __str__(self): 1.60 + return self.message 1.61 + 1.62 + 1.63 +class DocumentConverter: 1.64 + 1.65 + def __init__(self, port=DEFAULT_OPENOFFICE_PORT): 1.66 + localContext = uno.getComponentContext() 1.67 + resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext) 1.68 + try: 1.69 + context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port) 1.70 + except NoConnectException: 1.71 + raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port 1.72 + self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context) 1.73 + 1.74 + def convert(self, inputFile, outputFile): 1.75 + 1.76 + inputUrl = self._toFileUrl(inputFile) 1.77 + outputUrl = self._toFileUrl(outputFile) 1.78 + 1.79 + document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True)) 1.80 + try: 1.81 + document.refresh() 1.82 + except AttributeError: 1.83 + pass 1.84 + 1.85 + outputExt = self._getFileExt(outputFile) 1.86 + filterName = self._filterName(document, outputExt) 1.87 + 1.88 + try: 1.89 + document.storeToURL(outputUrl, self._toProperties(FilterName=filterName)) 1.90 + finally: 1.91 + document.close(True) 1.92 + 1.93 + def _filterName(self, document, outputExt): 1.94 + family = self._detectFamily(document) 1.95 + try: 1.96 + filterByFamily = FILTER_MAP[outputExt] 1.97 + except KeyError: 1.98 + raise DocumentConversionException, "unknown output format: '%s'" % outputExt 1.99 + try: 1.100 + return filterByFamily[family] 1.101 + except KeyError: 1.102 + raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt) 1.103 + 1.104 + def _detectFamily(self, document): 1.105 + if document.supportsService("com.sun.star.text.GenericTextDocument"): 1.106 + # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument 1.107 + # but this further distinction doesn't seem to matter for conversions 1.108 + return FAMILY_TEXT 1.109 + if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"): 1.110 + return FAMILY_SPREADSHEET 1.111 + if document.supportsService("com.sun.star.presentation.PresentationDocument"): 1.112 + return FAMILY_PRESENTATION 1.113 + if document.supportsService("com.sun.star.drawing.DrawingDocument"): 1.114 + return FAMILY_DRAWING 1.115 + raise DocumentConversionException, "unknown document family: %s" % document 1.116 + 1.117 + def _getFileExt(self, path): 1.118 + ext = splitext(path)[1] 1.119 + if ext is not None: 1.120 + return ext[1:].lower() 1.121 + 1.122 + def _toFileUrl(self, path): 1.123 + return uno.systemPathToFileUrl(abspath(path)) 1.124 + 1.125 + def _toProperties(self, **args): 1.126 + props = [] 1.127 + for key in args: 1.128 + prop = PropertyValue() 1.129 + prop.Name = key 1.130 + prop.Value = args[key] 1.131 + props.append(prop) 1.132 + return tuple(props) 1.133 + 1.134 + 1.135 +if __name__ == "__main__": 1.136 + from sys import argv, exit 1.137 + 1.138 + if len(argv) < 3: 1.139 + print "USAGE: python %s <input-file> <output-file>" % argv[0] 1.140 + exit(255) 1.141 + if not isfile(argv[1]): 1.142 + print "no such input file: %s" % argv[1] 1.143 + exit(1) 1.144 + 1.145 + try: 1.146 + converter = DocumentConverter() 1.147 + converter.convert(argv[1], argv[2]) 1.148 + except DocumentConversionException, exception: 1.149 + print "ERROR!" + str(exception) 1.150 + exit(1) 1.151 + except ErrorCodeIOException, exception: 1.152 + print "ERROR! ErrorCodeIOException %d" % exception.ErrCode 1.153 + exit(1) 1.154 +