rev |
line source |
bshanks@8 | 1 #!/usr/bin/python
|
bshanks@8 | 2 #
|
bshanks@8 | 3 # PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
|
bshanks@8 | 4 #
|
bshanks@8 | 5 # This script converts a document from one office format to another by
|
bshanks@8 | 6 # connecting to an OpenOffice.org instance via Python-UNO bridge.
|
bshanks@8 | 7 #
|
bshanks@8 | 8 # Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
|
bshanks@8 | 9 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
|
bshanks@8 | 10 # - or any later version.
|
bshanks@8 | 11 #
|
bshanks@8 | 12 DEFAULT_OPENOFFICE_PORT = 8100
|
bshanks@8 | 13
|
bshanks@8 | 14 import uno
|
bshanks@8 | 15 from os.path import abspath, isfile, splitext
|
bshanks@8 | 16 from com.sun.star.beans import PropertyValue
|
bshanks@8 | 17 from com.sun.star.task import ErrorCodeIOException
|
bshanks@8 | 18 from com.sun.star.connection import NoConnectException
|
bshanks@8 | 19
|
bshanks@8 | 20 FAMILY_TEXT = "Text"
|
bshanks@8 | 21 FAMILY_SPREADSHEET = "Spreadsheet"
|
bshanks@8 | 22 FAMILY_PRESENTATION = "Presentation"
|
bshanks@8 | 23 FAMILY_DRAWING = "Drawing"
|
bshanks@8 | 24
|
bshanks@8 | 25 FILTER_MAP = {
|
bshanks@8 | 26 "pdf": {
|
bshanks@8 | 27 FAMILY_TEXT: "writer_pdf_Export",
|
bshanks@8 | 28 FAMILY_SPREADSHEET: "calc_pdf_Export",
|
bshanks@8 | 29 FAMILY_PRESENTATION: "impress_pdf_Export",
|
bshanks@8 | 30 FAMILY_DRAWING: "draw_pdf_Export"
|
bshanks@8 | 31 },
|
bshanks@8 | 32 "html": {
|
bshanks@8 | 33 FAMILY_TEXT: "HTML (StarWriter)",
|
bshanks@8 | 34 FAMILY_SPREADSHEET: "HTML (StarCalc)",
|
bshanks@8 | 35 FAMILY_PRESENTATION: "impress_html_Export"
|
bshanks@8 | 36 },
|
bshanks@8 | 37 "odt": { FAMILY_TEXT: "writer8" },
|
bshanks@8 | 38 "doc": { FAMILY_TEXT: "MS Word 97" },
|
bshanks@8 | 39 "rtf": { FAMILY_TEXT: "Rich Text Format" },
|
bshanks@8 | 40 "txt": { FAMILY_TEXT: "Text" },
|
bshanks@8 | 41 "ods": { FAMILY_SPREADSHEET: "calc8" },
|
bshanks@8 | 42 "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
|
bshanks@8 | 43 "odp": { FAMILY_PRESENTATION: "impress8" },
|
bshanks@8 | 44 "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
|
bshanks@8 | 45 "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
|
bshanks@8 | 46 }
|
bshanks@8 | 47 # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
|
bshanks@8 | 48 # for more available filters
|
bshanks@8 | 49
|
bshanks@8 | 50
|
bshanks@8 | 51 class DocumentConversionException(Exception):
|
bshanks@8 | 52
|
bshanks@8 | 53 def __init__(self, message):
|
bshanks@8 | 54 self.message = message
|
bshanks@8 | 55
|
bshanks@8 | 56 def __str__(self):
|
bshanks@8 | 57 return self.message
|
bshanks@8 | 58
|
bshanks@8 | 59
|
bshanks@8 | 60 class DocumentConverter:
|
bshanks@8 | 61
|
bshanks@8 | 62 def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
|
bshanks@8 | 63 localContext = uno.getComponentContext()
|
bshanks@8 | 64 resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
|
bshanks@8 | 65 try:
|
bshanks@8 | 66 context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
|
bshanks@8 | 67 except NoConnectException:
|
bshanks@8 | 68 raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
|
bshanks@8 | 69 self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
|
bshanks@8 | 70
|
bshanks@8 | 71 def convert(self, inputFile, outputFile):
|
bshanks@8 | 72
|
bshanks@8 | 73 inputUrl = self._toFileUrl(inputFile)
|
bshanks@8 | 74 outputUrl = self._toFileUrl(outputFile)
|
bshanks@8 | 75
|
bshanks@8 | 76 document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
|
bshanks@8 | 77 try:
|
bshanks@8 | 78 document.refresh()
|
bshanks@8 | 79 except AttributeError:
|
bshanks@8 | 80 pass
|
bshanks@8 | 81
|
bshanks@8 | 82 outputExt = self._getFileExt(outputFile)
|
bshanks@8 | 83 filterName = self._filterName(document, outputExt)
|
bshanks@8 | 84
|
bshanks@8 | 85 try:
|
bshanks@8 | 86 document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
|
bshanks@8 | 87 finally:
|
bshanks@8 | 88 document.close(True)
|
bshanks@8 | 89
|
bshanks@8 | 90 def _filterName(self, document, outputExt):
|
bshanks@8 | 91 family = self._detectFamily(document)
|
bshanks@8 | 92 try:
|
bshanks@8 | 93 filterByFamily = FILTER_MAP[outputExt]
|
bshanks@8 | 94 except KeyError:
|
bshanks@8 | 95 raise DocumentConversionException, "unknown output format: '%s'" % outputExt
|
bshanks@8 | 96 try:
|
bshanks@8 | 97 return filterByFamily[family]
|
bshanks@8 | 98 except KeyError:
|
bshanks@8 | 99 raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
|
bshanks@8 | 100
|
bshanks@8 | 101 def _detectFamily(self, document):
|
bshanks@8 | 102 if document.supportsService("com.sun.star.text.GenericTextDocument"):
|
bshanks@8 | 103 # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
|
bshanks@8 | 104 # but this further distinction doesn't seem to matter for conversions
|
bshanks@8 | 105 return FAMILY_TEXT
|
bshanks@8 | 106 if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
|
bshanks@8 | 107 return FAMILY_SPREADSHEET
|
bshanks@8 | 108 if document.supportsService("com.sun.star.presentation.PresentationDocument"):
|
bshanks@8 | 109 return FAMILY_PRESENTATION
|
bshanks@8 | 110 if document.supportsService("com.sun.star.drawing.DrawingDocument"):
|
bshanks@8 | 111 return FAMILY_DRAWING
|
bshanks@8 | 112 raise DocumentConversionException, "unknown document family: %s" % document
|
bshanks@8 | 113
|
bshanks@8 | 114 def _getFileExt(self, path):
|
bshanks@8 | 115 ext = splitext(path)[1]
|
bshanks@8 | 116 if ext is not None:
|
bshanks@8 | 117 return ext[1:].lower()
|
bshanks@8 | 118
|
bshanks@8 | 119 def _toFileUrl(self, path):
|
bshanks@8 | 120 return uno.systemPathToFileUrl(abspath(path))
|
bshanks@8 | 121
|
bshanks@8 | 122 def _toProperties(self, **args):
|
bshanks@8 | 123 props = []
|
bshanks@8 | 124 for key in args:
|
bshanks@8 | 125 prop = PropertyValue()
|
bshanks@8 | 126 prop.Name = key
|
bshanks@8 | 127 prop.Value = args[key]
|
bshanks@8 | 128 props.append(prop)
|
bshanks@8 | 129 return tuple(props)
|
bshanks@8 | 130
|
bshanks@8 | 131
|
bshanks@8 | 132 if __name__ == "__main__":
|
bshanks@8 | 133 from sys import argv, exit
|
bshanks@8 | 134
|
bshanks@8 | 135 if len(argv) < 3:
|
bshanks@8 | 136 print "USAGE: python %s <input-file> <output-file>" % argv[0]
|
bshanks@8 | 137 exit(255)
|
bshanks@8 | 138 if not isfile(argv[1]):
|
bshanks@8 | 139 print "no such input file: %s" % argv[1]
|
bshanks@8 | 140 exit(1)
|
bshanks@8 | 141
|
bshanks@8 | 142 try:
|
bshanks@8 | 143 converter = DocumentConverter()
|
bshanks@8 | 144 converter.convert(argv[1], argv[2])
|
bshanks@8 | 145 except DocumentConversionException, exception:
|
bshanks@8 | 146 print "ERROR!" + str(exception)
|
bshanks@8 | 147 exit(1)
|
bshanks@8 | 148 except ErrorCodeIOException, exception:
|
bshanks@8 | 149 print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
|
bshanks@8 | 150 exit(1)
|
bshanks@8 | 151
|