cg

annotate DocumentConverter.py @ 11:0a8047f28b9d

.
author bshanks@bshanks.dyndns.org
date Sat Apr 11 21:38:36 2009 -0700 (16 years ago)
parents
children

rev   line source
bshanks@8 1 #!/usr/bin/python
bshanks@8 2 #
bshanks@8 3 # PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
bshanks@8 4 #
bshanks@8 5 # This script converts a document from one office format to another by
bshanks@8 6 # connecting to an OpenOffice.org instance via Python-UNO bridge.
bshanks@8 7 #
bshanks@8 8 # Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
bshanks@8 9 # Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
bshanks@8 10 # - or any later version.
bshanks@8 11 #
bshanks@8 12 DEFAULT_OPENOFFICE_PORT = 8100
bshanks@8 13
bshanks@8 14 import uno
bshanks@8 15 from os.path import abspath, isfile, splitext
bshanks@8 16 from com.sun.star.beans import PropertyValue
bshanks@8 17 from com.sun.star.task import ErrorCodeIOException
bshanks@8 18 from com.sun.star.connection import NoConnectException
bshanks@8 19
bshanks@8 20 FAMILY_TEXT = "Text"
bshanks@8 21 FAMILY_SPREADSHEET = "Spreadsheet"
bshanks@8 22 FAMILY_PRESENTATION = "Presentation"
bshanks@8 23 FAMILY_DRAWING = "Drawing"
bshanks@8 24
bshanks@8 25 FILTER_MAP = {
bshanks@8 26 "pdf": {
bshanks@8 27 FAMILY_TEXT: "writer_pdf_Export",
bshanks@8 28 FAMILY_SPREADSHEET: "calc_pdf_Export",
bshanks@8 29 FAMILY_PRESENTATION: "impress_pdf_Export",
bshanks@8 30 FAMILY_DRAWING: "draw_pdf_Export"
bshanks@8 31 },
bshanks@8 32 "html": {
bshanks@8 33 FAMILY_TEXT: "HTML (StarWriter)",
bshanks@8 34 FAMILY_SPREADSHEET: "HTML (StarCalc)",
bshanks@8 35 FAMILY_PRESENTATION: "impress_html_Export"
bshanks@8 36 },
bshanks@8 37 "odt": { FAMILY_TEXT: "writer8" },
bshanks@8 38 "doc": { FAMILY_TEXT: "MS Word 97" },
bshanks@8 39 "rtf": { FAMILY_TEXT: "Rich Text Format" },
bshanks@8 40 "txt": { FAMILY_TEXT: "Text" },
bshanks@8 41 "ods": { FAMILY_SPREADSHEET: "calc8" },
bshanks@8 42 "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
bshanks@8 43 "odp": { FAMILY_PRESENTATION: "impress8" },
bshanks@8 44 "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
bshanks@8 45 "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
bshanks@8 46 }
bshanks@8 47 # see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
bshanks@8 48 # for more available filters
bshanks@8 49
bshanks@8 50
bshanks@8 51 class DocumentConversionException(Exception):
bshanks@8 52
bshanks@8 53 def __init__(self, message):
bshanks@8 54 self.message = message
bshanks@8 55
bshanks@8 56 def __str__(self):
bshanks@8 57 return self.message
bshanks@8 58
bshanks@8 59
bshanks@8 60 class DocumentConverter:
bshanks@8 61
bshanks@8 62 def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
bshanks@8 63 localContext = uno.getComponentContext()
bshanks@8 64 resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
bshanks@8 65 try:
bshanks@8 66 context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
bshanks@8 67 except NoConnectException:
bshanks@8 68 raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
bshanks@8 69 self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
bshanks@8 70
bshanks@8 71 def convert(self, inputFile, outputFile):
bshanks@8 72
bshanks@8 73 inputUrl = self._toFileUrl(inputFile)
bshanks@8 74 outputUrl = self._toFileUrl(outputFile)
bshanks@8 75
bshanks@8 76 document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
bshanks@8 77 try:
bshanks@8 78 document.refresh()
bshanks@8 79 except AttributeError:
bshanks@8 80 pass
bshanks@8 81
bshanks@8 82 outputExt = self._getFileExt(outputFile)
bshanks@8 83 filterName = self._filterName(document, outputExt)
bshanks@8 84
bshanks@8 85 try:
bshanks@8 86 document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
bshanks@8 87 finally:
bshanks@8 88 document.close(True)
bshanks@8 89
bshanks@8 90 def _filterName(self, document, outputExt):
bshanks@8 91 family = self._detectFamily(document)
bshanks@8 92 try:
bshanks@8 93 filterByFamily = FILTER_MAP[outputExt]
bshanks@8 94 except KeyError:
bshanks@8 95 raise DocumentConversionException, "unknown output format: '%s'" % outputExt
bshanks@8 96 try:
bshanks@8 97 return filterByFamily[family]
bshanks@8 98 except KeyError:
bshanks@8 99 raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
bshanks@8 100
bshanks@8 101 def _detectFamily(self, document):
bshanks@8 102 if document.supportsService("com.sun.star.text.GenericTextDocument"):
bshanks@8 103 # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
bshanks@8 104 # but this further distinction doesn't seem to matter for conversions
bshanks@8 105 return FAMILY_TEXT
bshanks@8 106 if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
bshanks@8 107 return FAMILY_SPREADSHEET
bshanks@8 108 if document.supportsService("com.sun.star.presentation.PresentationDocument"):
bshanks@8 109 return FAMILY_PRESENTATION
bshanks@8 110 if document.supportsService("com.sun.star.drawing.DrawingDocument"):
bshanks@8 111 return FAMILY_DRAWING
bshanks@8 112 raise DocumentConversionException, "unknown document family: %s" % document
bshanks@8 113
bshanks@8 114 def _getFileExt(self, path):
bshanks@8 115 ext = splitext(path)[1]
bshanks@8 116 if ext is not None:
bshanks@8 117 return ext[1:].lower()
bshanks@8 118
bshanks@8 119 def _toFileUrl(self, path):
bshanks@8 120 return uno.systemPathToFileUrl(abspath(path))
bshanks@8 121
bshanks@8 122 def _toProperties(self, **args):
bshanks@8 123 props = []
bshanks@8 124 for key in args:
bshanks@8 125 prop = PropertyValue()
bshanks@8 126 prop.Name = key
bshanks@8 127 prop.Value = args[key]
bshanks@8 128 props.append(prop)
bshanks@8 129 return tuple(props)
bshanks@8 130
bshanks@8 131
bshanks@8 132 if __name__ == "__main__":
bshanks@8 133 from sys import argv, exit
bshanks@8 134
bshanks@8 135 if len(argv) < 3:
bshanks@8 136 print "USAGE: python %s <input-file> <output-file>" % argv[0]
bshanks@8 137 exit(255)
bshanks@8 138 if not isfile(argv[1]):
bshanks@8 139 print "no such input file: %s" % argv[1]
bshanks@8 140 exit(1)
bshanks@8 141
bshanks@8 142 try:
bshanks@8 143 converter = DocumentConverter()
bshanks@8 144 converter.convert(argv[1], argv[2])
bshanks@8 145 except DocumentConversionException, exception:
bshanks@8 146 print "ERROR!" + str(exception)
bshanks@8 147 exit(1)
bshanks@8 148 except ErrorCodeIOException, exception:
bshanks@8 149 print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
bshanks@8 150 exit(1)
bshanks@8 151