cg

changeset 8:3bc61ab8e776
.
author: bshanks@bshanks.dyndns.org
date: Sat Apr 11 21:31:27 2009 -0700 (16 years ago)
parents: 075618f574d8
children: 3480ab8239f5
files: DocumentConverter.py grant.html grant.odt grant.txt
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/DocumentConverter.py	Sat Apr 11 21:31:27 2009 -0700
@@ -0,0 +1,151 @@
+#!/usr/bin/python
+#
+# PyODConverter (Python OpenDocument Converter) v1.0.0 - 2008-05-05
+#
+# This script converts a document from one office format to another by
+# connecting to an OpenOffice.org instance via Python-UNO bridge.
+#
+# Copyright (C) 2008 Mirko Nasato <mirko@artofsolving.com>
+# Licensed under the GNU LGPL v2.1 - http://www.gnu.org/licenses/lgpl-2.1.html
+# - or any later version.
+#
+DEFAULT_OPENOFFICE_PORT = 8100
+
+import uno
+from os.path import abspath, isfile, splitext
+from com.sun.star.beans import PropertyValue
+from com.sun.star.task import ErrorCodeIOException
+from com.sun.star.connection import NoConnectException
+
+FAMILY_TEXT = "Text"
+FAMILY_SPREADSHEET = "Spreadsheet"
+FAMILY_PRESENTATION = "Presentation"
+FAMILY_DRAWING = "Drawing"
+
+FILTER_MAP = {
+    "pdf": {
+        FAMILY_TEXT: "writer_pdf_Export",
+        FAMILY_SPREADSHEET: "calc_pdf_Export",
+        FAMILY_PRESENTATION: "impress_pdf_Export",
+        FAMILY_DRAWING: "draw_pdf_Export"
+    },
+    "html": {
+        FAMILY_TEXT: "HTML (StarWriter)",
+        FAMILY_SPREADSHEET: "HTML (StarCalc)",
+        FAMILY_PRESENTATION: "impress_html_Export"
+    },
+    "odt": { FAMILY_TEXT: "writer8" },
+    "doc": { FAMILY_TEXT: "MS Word 97" },
+    "rtf": { FAMILY_TEXT: "Rich Text Format" },
+    "txt": { FAMILY_TEXT: "Text" },
+    "ods": { FAMILY_SPREADSHEET: "calc8" },
+    "xls": { FAMILY_SPREADSHEET: "MS Excel 97" },
+    "odp": { FAMILY_PRESENTATION: "impress8" },
+    "ppt": { FAMILY_PRESENTATION: "MS PowerPoint 97" },
+    "swf": { FAMILY_PRESENTATION: "impress_flash_Export" }
+}
+# see http://wiki.services.openoffice.org/wiki/Framework/Article/Filter
+# for more available filters
+
+
+class DocumentConversionException(Exception):
+
+    def __init__(self, message):
+        self.message = message
+
+    def __str__(self):
+        return self.message
+
+
+class DocumentConverter:
+    
+    def __init__(self, port=DEFAULT_OPENOFFICE_PORT):
+        localContext = uno.getComponentContext()
+        resolver = localContext.ServiceManager.createInstanceWithContext("com.sun.star.bridge.UnoUrlResolver", localContext)
+        try:
+            context = resolver.resolve("uno:socket,host=localhost,port=%s;urp;StarOffice.ComponentContext" % port)
+        except NoConnectException:
+            raise DocumentConversionException, "failed to connect to OpenOffice.org on port %s" % port
+        self.desktop = context.ServiceManager.createInstanceWithContext("com.sun.star.frame.Desktop", context)
+
+    def convert(self, inputFile, outputFile):
+
+        inputUrl = self._toFileUrl(inputFile)
+        outputUrl = self._toFileUrl(outputFile)
+        
+        document = self.desktop.loadComponentFromURL(inputUrl, "_blank", 0, self._toProperties(Hidden=True))
+        try:
+          document.refresh()
+        except AttributeError:
+          pass
+        
+        outputExt = self._getFileExt(outputFile)
+        filterName = self._filterName(document, outputExt)
+
+        try:
+            document.storeToURL(outputUrl, self._toProperties(FilterName=filterName))
+        finally:
+            document.close(True)
+
+    def _filterName(self, document, outputExt):
+        family = self._detectFamily(document)
+        try:
+            filterByFamily = FILTER_MAP[outputExt]
+        except KeyError:
+            raise DocumentConversionException, "unknown output format: '%s'" % outputExt
+        try:
+            return filterByFamily[family]
+        except KeyError:
+            raise DocumentConversionException, "unsupported conversion: from '%s' to '%s'" % (family, outputExt)
+    
+    def _detectFamily(self, document):
+        if document.supportsService("com.sun.star.text.GenericTextDocument"):
+            # NOTE: a GenericTextDocument is either a TextDocument, a WebDocument, or a GlobalDocument
+            # but this further distinction doesn't seem to matter for conversions
+            return FAMILY_TEXT
+        if document.supportsService("com.sun.star.sheet.SpreadsheetDocument"):
+            return FAMILY_SPREADSHEET
+        if document.supportsService("com.sun.star.presentation.PresentationDocument"):
+            return FAMILY_PRESENTATION
+        if document.supportsService("com.sun.star.drawing.DrawingDocument"):
+            return FAMILY_DRAWING
+        raise DocumentConversionException, "unknown document family: %s" % document
+
+    def _getFileExt(self, path):
+        ext = splitext(path)[1]
+        if ext is not None:
+            return ext[1:].lower()
+
+    def _toFileUrl(self, path):
+        return uno.systemPathToFileUrl(abspath(path))
+
+    def _toProperties(self, **args):
+        props = []
+        for key in args:
+	    prop = PropertyValue()
+	    prop.Name = key
+	    prop.Value = args[key]
+	    props.append(prop)
+        return tuple(props)
+
+
+if __name__ == "__main__":
+    from sys import argv, exit
+    
+    if len(argv) < 3:
+        print "USAGE: python %s <input-file> <output-file>" % argv[0]
+        exit(255)
+    if not isfile(argv[1]):
+        print "no such input file: %s" % argv[1]
+        exit(1)
+
+    try:
+        converter = DocumentConverter()    
+        converter.convert(argv[1], argv[2])
+    except DocumentConversionException, exception:
+        print "ERROR!" + str(exception)
+        exit(1)
+    except ErrorCodeIOException, exception:
+        print "ERROR! ErrorCodeIOException %d" % exception.ErrCode
+        exit(1)
+
--- a/grant.html	Sat Apr 11 19:59:01 2009 -0700
+++ b/grant.html	Sat Apr 11 21:31:27 2009 -0700
@@ -1,5 +1,6 @@
-            Massive new datasets obtained with techniques such as in situ hybridization
+            todo test3
+               Massive new datasets obtained with techniques such as in situ hybridization
@@ -34,10 +35,10 @@
+                                            1
+
-                                            1
-
@@ -79,11 +80,11 @@
+                                            2
+
-                                            2
-
@@ -121,10 +122,10 @@
+                                            3
+
-                                            3
-
@@ -164,11 +165,11 @@
+                                            4
+
-                                            4
-
@@ -199,9 +200,7 @@
-               The requirement to find combinations of only a small number of genes limits
-            us from straightforwardly applying many of the most simple techniques from
-__________________________
+_____________________
@@ -222,13 +221,8 @@
-                                                        
-                                                        
-            Figure 2: The top row shows the three genes which (individually) best predict
-            area AUD, according to logistic regression.  The bottom row shows the three
-            genes which (individually) best match area AUD, according to gradient similar-
-            ity. From left to right and top to bottom, the genes are Ssr1, Efcbp1, Aph1a,
-            Ptk7, Aph1a again, and Lepr
+               The requirement to find combinations of only a small number of genes limits
+            us from straightforwardly applying many of the most simple techniques from
@@ -246,16 +240,6 @@
-__________________________
-   4For each gene, a logistic regression in which the response variable was whether or not a
-surface pixel was within area AUD, and the predictor variable was the value of the expression
-of the gene underneath that pixel. The resulting scores were used to rank the genes in terms
-of how well they predict area AUD.
-    5For each gene the gradient similarity (see section ??) between (a) a map of the expression
-of each gene on the cortical surface and (b) the shape of area AUD, was calculated, and this
-was used to rank the genes.
-                                            7
-
@@ -273,6 +257,23 @@
+____________________
+   4For each gene, a logistic regression in which the response variable was whether or not a
+surface pixel was within area AUD, and the predictor variable was the value of the expression
+of the gene underneath that pixel. The resulting scores were used to rank the genes in terms
+of how well they predict area AUD.
+    5For each gene the gradient similarity (see section ??) between (a) a map of the expression
+of each gene on the cortical surface and (b) the shape of area AUD, was calculated, and this
+was used to rank the genes.
+                                            7
+
+                                                        
+                                                        
+            Figure 2: The top row shows the three genes which (individually) best predict
+            area AUD, according to logistic regression.  The bottom row shows the three
+            genes which (individually) best match area AUD, according to gradient similar-
+            ity. From left to right and top to bottom, the genes are Ssr1, Efcbp1, Aph1a,
+            Ptk7, Aph1a again, and Lepr
@@ -295,10 +296,10 @@
+                                            8
+
-                                            8
-
@@ -340,11 +341,11 @@
-    We expect to discover sets of marker genes that pick out specific cortical
-areas.  This will allow the development of drugs and other interventions that
-selectively target individual cortical areas.   Therefore our research will lead
+               We expect to discover sets of marker genes that pick out specific cortical
+            areas.  This will allow the development of drugs and other interventions that
+            selectively target individual cortical areas.   Therefore our research will lead
@@ -388,11 +389,11 @@
+                                            10
+
-                                            10
-
@@ -427,10 +428,10 @@
+                                            11
+
-                                            11
-
@@ -471,12 +472,12 @@
+                                            12
+
-                                            12
-
@@ -519,12 +520,12 @@
+                                            13
+
-                                            13
-
@@ -567,12 +568,12 @@
+                                            14
+
-                                            14
-
@@ -615,12 +616,12 @@
+                                            15
+
-                                            15
-
@@ -658,12 +659,12 @@
-Initiative), SEV (Allen Brain Institute Smoothed Energy Volume), and MAT-
-LAB. This ensures that our users will not have to exclusively rely on our tools
-when analyzing data. For example, users will be able to use the data visualiza-
-tion and analysis capabilities of MATLAB and Caret alongside our software.
+            Initiative), SEV (Allen Brain Institute Smoothed Energy Volume), and MAT-
+            LAB. This ensures that our users will not have to exclusively rely on our tools
+            when analyzing data. For example, users will be able to use the data visualiza-
+            tion and analysis capabilities of MATLAB and Caret alongside our software.
@@ -705,13 +706,13 @@
+                                            17
+
-                                            17
-
@@ -751,12 +752,12 @@
+                                            18
+
-                                            18
-
--- a/grant.txt	Sat Apr 11 19:59:01 2009 -0700
+++ b/grant.txt	Sat Apr 11 21:31:27 2009 -0700
@@ -1,5 +1,7 @@
+todo test3
+
author	bshanks@bshanks.dyndns.org
date	Sat Apr 11 21:31:27 2009 -0700 (16 years ago)
parents	075618f574d8
children	3480ab8239f5
files	DocumentConverter.py grant.html grant.odt grant.txt