Tuesday, March 8, 2011

Extract images from word & OpenOffice documents with C# and OpenOffice uno SDK

if you don't know how to setup development environment for uno to work properly please read this post.

using System;
using System.IO;
using System.Runtime.InteropServices;
using Microsoft.Win32;
using unoidl.com.sun.star.beans;
using unoidl.com.sun.star.container;
using unoidl.com.sun.star.frame;
using unoidl.com.sun.star.graphic;
using unoidl.com.sun.star.lang;
using unoidl.com.sun.star.text;
using unoidl.com.sun.star.uno;

namespace OOExtractTable
{
    class Program
    {
        /// Initialise OpenOffice Environment variable "UNO_PATH".
        public static void InitEnvironment()
        {
            string baseKey = null;
            // OpenOffice being a 32 bit app, its registry location is different in a 64 bit OS
            if (Marshal.SizeOf(typeof(IntPtr)) == 8)
                baseKey = @"SOFTWARE\Wow6432Node\OpenOffice.org\";
            else
                baseKey = @"SOFTWARE\OpenOffice.org\";

            // Get the URE directory
            string key = baseKey + @"Layers\URE\1";
            RegistryKey reg = Registry.CurrentUser.OpenSubKey(key);
            if (reg == null) reg = Registry.LocalMachine.OpenSubKey(key);
            string urePath = reg.GetValue("UREINSTALLLOCATION") as string;
            reg.Close();
            urePath = Path.Combine(urePath, "bin");

            // Get the UNO Path
            key = baseKey + @"UNO\InstallPath";
            reg = Registry.CurrentUser.OpenSubKey(key);
            if (reg == null) reg = Registry.LocalMachine.OpenSubKey(key);
            string unoPath = reg.GetValue(null) as string;
            reg.Close();

            string path;
            path = string.Format("{0};{1}", Environment.GetEnvironmentVariable("PATH"), urePath);
            Environment.SetEnvironmentVariable("PATH", path);
            Environment.SetEnvironmentVariable("UNO_PATH", unoPath);
        }
        
        public static XMultiServiceFactory Bootstrap()
        {
            XComponentContext m_xContext = null;
            try
            {
                m_xContext = uno.util.Bootstrap.bootstrap();
            }
            catch (System.Exception exp)
            {
                throw new System.Exception("public static XMultiServiceFactory Connect()-> " + exp.Message);
            }

            if (m_xContext == null)
                throw new System.Exception("public static XMultiServiceFactory Connect()-> OpenOffice failed to start.");

            return m_xContext.getServiceManager() as XMultiServiceFactory;
        }

        public static string PathConverter(string file)
        {
            return "file:///" + file.Replace(@"\", "/");
        }

        public static void ExportGraphic(XGraphicProvider xGraphicProvider, XGraphic xGraphic, string fileName)
        {
            if (xGraphicProvider == null)
                throw new ArgumentNullException("XGraphicProvider");

            if (xGraphic == null)
                throw new ArgumentNullException("XGraphic");

            //if (!string.IsNullOrEmpty(fileName))
            //    throw new ArgumentNullException("fileName");

            PropertyValue[] properties = 
                {
                    new PropertyValue() { Name = "URL", Value = new uno.Any(fileName), },
                    new PropertyValue() { Name = "MimeType", Value = new uno.Any("image/" + fileName.Trim().Substring(fileName.Length - 3)) }
                };

            xGraphicProvider.storeGraphic(xGraphic, properties);
        }

        public static void ExportEmbeddedGraphics(XGraphicProvider xGraphicProvider, XTextDocument xTextDocument, string outDir)
        {
            if (xGraphicProvider == null)
                throw new ArgumentNullException("XGraphicProvider");

            if (xTextDocument == null)
                throw new ArgumentNullException("XTextDocument");

            XTextGraphicObjectsSupplier xTGOSupplier = xTextDocument as XTextGraphicObjectsSupplier;
            if (xTGOSupplier == null)
                throw new NullReferenceException("XTextGraphicObjectsSupplier");

            XNameAccess nameAccess = xTGOSupplier.getGraphicObjects();
            if (nameAccess != null && nameAccess.hasElements())
            {
                string[] names = nameAccess.getElementNames();
                for (int i = 0; i < names.Length; i++)
                {
                    XServiceInfo xsi = nameAccess.getByName(names[i]).Value as XServiceInfo;
                    if (xsi != null &&
                        xsi.supportsService("com.sun.star.text.TextContent") &&
                        xsi.supportsService("com.sun.star.text.TextGraphicObject"))
                    {
                        XPropertySet xProps = nameAccess.getByName(names[i]).Value as XPropertySet;
                        string url = (String)xProps.getPropertyValue("GraphicURL").Value;

                        PropertyValue[] properties = 
                                {
                                    new PropertyValue() { Name = "URL", Value = new uno.Any(url) }
                                };

                        XGraphic xGraphic = xGraphicProvider.queryGraphic(properties);

                        string fileName = Guid.NewGuid().ToString() + ".png";
                        ExportGraphic(xGraphicProvider, xGraphic, PathConverter(Path.Combine(outDir, fileName)));
                    }
                }
            }
        }

        static void Main(string[] args)
        {
            string fileName = @"C:\code\_other\_MyPosts\OpenOffice_extract_images\table_pictures_test.doc";
            string outDir = @"C:\code\_other\_MyPosts\OpenOffice_extract_images\out\";

            InitEnvironment();
            XMultiServiceFactory xMultiServiceFactory = Bootstrap();
            XComponentLoader xComponentLoader = xMultiServiceFactory.createInstance("com.sun.star.frame.Desktop") as XComponentLoader;
            if (xComponentLoader == null)
            {
                Console.WriteLine("Failed to start OpenOffice");
                return;
            }

            XGraphicProvider xGraphicProvider = xMultiServiceFactory.createInstance("com.sun.star.graphic.GraphicProvider") as XGraphicProvider;
            if (xGraphicProvider == null)
            {
                Console.WriteLine("Failed to get XGraphicProvider");
                return;
            }

            PropertyValue[] propertyValue = 
            {
                new PropertyValue() { Name = "Hidden", Value = new uno.Any(true) },                 
                new PropertyValue() { Name = "CharacterSet", Value = new uno.Any("Unicode (UTF-8)") }
            };

            string path = PathConverter(fileName);
            XComponent xComponent = xComponentLoader.loadComponentFromURL(path, "_blank", 0, propertyValue);
            if (xComponent == null)
            {
                Console.WriteLine("Failed to open file: '" + fileName);
                return;
            }

            ExportEmbeddedGraphics(xGraphicProvider, xComponent as XTextDocument, outDir);
        }
    }
}