View Javadoc

1   /**
2    * This file Copyright (c) 2003-2010 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.importexport;
35  
36  import info.magnolia.cms.beans.runtime.Document;
37  import info.magnolia.cms.core.Content;
38  import info.magnolia.cms.core.HierarchyManager;
39  import info.magnolia.cms.core.ItemType;
40  import info.magnolia.cms.core.SystemProperty;
41  import info.magnolia.cms.util.ContentUtil;
42  import info.magnolia.cms.util.NodeDataUtil;
43  import info.magnolia.context.MgnlContext;
44  import info.magnolia.importexport.filters.ImportXmlRootFilter;
45  import info.magnolia.importexport.filters.MagnoliaV2Filter;
46  import info.magnolia.importexport.filters.MetadataUuidFilter;
47  import info.magnolia.importexport.filters.RemoveMixversionableFilter;
48  import info.magnolia.importexport.filters.VersionFilter;
49  
50  import java.io.File;
51  import java.io.FileInputStream;
52  import java.io.FileNotFoundException;
53  import java.io.FileOutputStream;
54  import java.io.IOException;
55  import java.io.InputStream;
56  import java.io.OutputStream;
57  import java.io.UnsupportedEncodingException;
58  import java.net.URLDecoder;
59  import java.net.URLEncoder;
60  import java.text.MessageFormat;
61  import java.util.Iterator;
62  import java.util.Properties;
63  import java.util.zip.DeflaterOutputStream;
64  import java.util.zip.GZIPInputStream;
65  import java.util.zip.GZIPOutputStream;
66  import java.util.zip.ZipInputStream;
67  import java.util.zip.ZipOutputStream;
68  
69  import javax.jcr.ImportUUIDBehavior;
70  import javax.jcr.Node;
71  import javax.jcr.PathNotFoundException;
72  import javax.jcr.RepositoryException;
73  import javax.jcr.Session;
74  import javax.jcr.Workspace;
75  import javax.xml.transform.Source;
76  import javax.xml.transform.sax.SAXTransformerFactory;
77  import javax.xml.transform.stream.StreamSource;
78  
79  import org.apache.commons.io.IOUtils;
80  import org.apache.commons.lang.StringUtils;
81  import org.apache.xml.serialize.OutputFormat;
82  import org.apache.xml.serialize.XMLSerializer;
83  import org.slf4j.Logger;
84  import org.slf4j.LoggerFactory;
85  import org.xml.sax.ContentHandler;
86  import org.xml.sax.InputSource;
87  import org.xml.sax.SAXException;
88  import org.xml.sax.XMLFilter;
89  import org.xml.sax.XMLReader;
90  import org.xml.sax.helpers.XMLReaderFactory;
91  
92  
93  /**
94   * imports and exports XML data
95   * @author <a href="mailto:niko@macnica.com">Nicolas Modrzyk</a>
96   * @author Oliver Lietz
97   */
98  public class DataTransporter {
99  
100     private static final int INDENT_VALUE = 2;
101 
102     private static Logger log = LoggerFactory.getLogger(DataTransporter.class.getName());
103 
104     final static int BOOTSTRAP_IMPORT_MODE = ImportUUIDBehavior.IMPORT_UUID_COLLISION_REPLACE_EXISTING;
105 
106     public static final String ZIP = ".zip";
107 
108     public static final String GZ = ".gz";
109 
110     public static final String XML = ".xml";
111 
112     public static final String PROPERTIES = ".properties";
113 
114     public static final String DOT = ".";
115 
116     public static final String SLASH = "/";
117     
118     public static final String UTF8 = "UTF-8";
119 
120     public static final String JCR_ROOT = "jcr:root";
121 
122     /**
123      * Document -> File
124      * @param xmlDocument uploaded file
125      * @param repositoryName selected repository
126      * @param basepath base path in repository
127      * @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
128      * @param importMode a valid value for ImportUUIDBehavior
129      * @param saveAfterImport
130      * @param createBasepathIfNotExist
131      * @throws IOException
132      * @see ImportUUIDBehavior
133      */
134     public static synchronized void importDocument(Document xmlDocument, String repositoryName, String basepath,
135                                                    boolean keepVersionHistory, int importMode, boolean saveAfterImport,
136                                                    boolean createBasepathIfNotExist)
137             throws IOException {
138         File xmlFile = xmlDocument.getFile();
139         importFile(xmlFile, repositoryName, basepath, keepVersionHistory, importMode, saveAfterImport,
140                 createBasepathIfNotExist);
141     }
142 
143     /**
144      * File -> InputStream
145      * @param xmlFile (zipped/gzipped) XML file to import
146      * @param repositoryName selected repository
147      * @param basepath base path in repository
148      * @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
149      * @param importMode a valid value for ImportUUIDBehavior
150      * @param saveAfterImport
151      * @param createBasepathIfNotExist
152      * @throws IOException
153      * @see ImportUUIDBehavior
154      */
155     public static synchronized void importFile(File xmlFile, String repositoryName, String basepath,
156                                                boolean keepVersionHistory, int importMode, boolean saveAfterImport,
157                                                boolean createBasepathIfNotExist)
158             throws IOException {
159         String name = xmlFile.getAbsolutePath();
160 
161         InputStream xmlStream = getInputStreamForFile(xmlFile);
162         importXmlStream(xmlStream, repositoryName, basepath, name, keepVersionHistory, importMode, saveAfterImport,
163                 createBasepathIfNotExist);
164     }
165 
166     /**
167      * @param xmlFile
168      * @param repositoryName
169      * @throws IOException
170      */
171     public static void executeBootstrapImport(File xmlFile, String repositoryName) throws IOException {
172         String filenameWithoutExt = StringUtils.substringBeforeLast(xmlFile.getName(), DOT);
173         if (filenameWithoutExt.endsWith(XML)) {
174             // if file ends in .xml.gz or .xml.zip
175             // need to keep the .xml to be able to view it after decompression
176             filenameWithoutExt = StringUtils.substringBeforeLast(xmlFile.getName(), DOT);
177         }
178         String pathName = StringUtils.substringAfter(StringUtils.substringBeforeLast(filenameWithoutExt, DOT), DOT);
179         
180         pathName = decodePath(pathName,  UTF8); 
181         
182         String basepath = SLASH + StringUtils.replace(pathName, DOT, SLASH);
183 
184         if (xmlFile.getName().endsWith(PROPERTIES)) {
185             Properties properties = new Properties();
186             properties.load(new FileInputStream(xmlFile));
187             importProperties(properties, repositoryName);
188         } else {
189             DataTransporter.importFile(xmlFile, repositoryName, basepath, false, BOOTSTRAP_IMPORT_MODE, true, true);
190         }
191     }
192 
193     /**
194      * Overwrite or set single values
195      * @param properties
196      * @param repositoryName
197      * @deprecated since 4.0 - use the PropertiesImportExport class instead.
198      */
199     public static void importProperties(Properties properties, String repositoryName) {
200         for (Iterator iter = properties.keySet().iterator(); iter.hasNext();) {
201             String key = (String) iter.next();
202             String value = (String) properties.get(key);
203 
204             String name = StringUtils.substringAfterLast(key, "."); //$NON-NLS-1$
205             String path = StringUtils.substringBeforeLast(key, ".").replace('.', '/'); //$NON-NLS-1$
206             Content node = ContentUtil.getContent(repositoryName, path);
207             if (node != null) {
208                 try {
209                     NodeDataUtil.getOrCreate(node, name).setValue(value);
210                     node.save();
211                 }
212                 catch (RepositoryException e) {
213                     log.error("can't set property " + key, e);
214                 }
215             }
216         }
217 
218     }
219 
220     /**
221      * imports XML stream into repository<p/>
222      * XML is filtered by <code>MagnoliaV2Filter</code>, <code>VersionFilter</code> and <code>ImportXmlRootFilter</code>
223      * if <code>keepVersionHistory</code> is set to <code>false</code>
224      * @param xmlStream XML stream to import
225      * @param repositoryName selected repository
226      * @param basepath base path in repository
227      * @param name (absolute path of <code>File</code>)
228      * @param keepVersionHistory if <code>false</code> version info will be stripped before importing the document
229      * @param importMode a valid value for ImportUUIDBehavior
230      * @param saveAfterImport
231      * @param createBasepathIfNotExist
232      * @throws IOException
233      * @see ImportUUIDBehavior
234      * @see ImportXmlRootFilter
235      * @see VersionFilter
236      * @see MagnoliaV2Filter
237      */
238     public static synchronized void importXmlStream(InputStream xmlStream, String repositoryName, String basepath,
239                                                     String name, boolean keepVersionHistory, int importMode,
240                                                     boolean saveAfterImport, boolean createBasepathIfNotExist)
241             throws IOException {
242 
243         // TODO hopefully this will be fixed with a more useful message with the Bootstrapper refactoring
244         if (xmlStream == null) {
245             throw new IOException("Can't import a null stream into repository: " + repositoryName + ", basepath: " + basepath + ", name: " + name);
246         }
247 
248         HierarchyManager hm = MgnlContext.getHierarchyManager(repositoryName);
249         if (hm == null) {
250             throw new IllegalStateException("Can't import " + name + " since repository " + repositoryName + " does not exist.");
251         }
252         Workspace ws = hm.getWorkspace();
253 
254         if (log.isDebugEnabled()) {
255             log.debug("Importing content into repository: [{}] from: [{}] into path: [{}]", //$NON-NLS-1$
256                     new Object[]{repositoryName, name, basepath});
257         }
258 
259         if (!hm.isExist(basepath) && createBasepathIfNotExist) {
260             try {
261                 ContentUtil.createPath(hm, basepath, ItemType.CONTENT);
262             }
263             catch (RepositoryException e) {
264                 log.error("can't create path [{}]", basepath); //$NON-NLS-1$
265             }
266         }
267 
268         Session session = ws.getSession();
269 
270         try {
271             if (keepVersionHistory) {
272                 // do not manipulate
273                 session.importXML(basepath, xmlStream, importMode);
274             } else {
275                 // create readers/filters and chain
276                 XMLReader initialReader = XMLReaderFactory.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName());
277 
278                 XMLFilter magnoliaV2Filter = null;
279 
280                 // if stream is from regular file, test for belonging XSL file to apply XSL transformation to XML
281                 if (new File(name).isFile()) {
282                     InputStream xslStream = getXslStreamForXmlFile(new File(name));
283                     if (xslStream != null) {
284                         Source xslSource = new StreamSource(xslStream);
285                         SAXTransformerFactory saxTransformerFactory = (SAXTransformerFactory) SAXTransformerFactory.newInstance();
286                         XMLFilter xslFilter = saxTransformerFactory.newXMLFilter(xslSource);
287                         magnoliaV2Filter = new MagnoliaV2Filter(xslFilter);
288                     }
289                 }
290 
291                 if (magnoliaV2Filter == null) {
292                     magnoliaV2Filter = new MagnoliaV2Filter(initialReader);
293                 }
294 
295                 XMLFilter versionFilter = new VersionFilter(magnoliaV2Filter);
296 
297                 // enable this to strip useless "name" properties from dialogs
298                 // versionFilter = new UselessNameFilter(versionFilter);
299 
300                 // enable this to strip mix:versionable from pre 3.6 xml files
301                 versionFilter = new RemoveMixversionableFilter(versionFilter);
302 
303                 XMLReader finalReader = new ImportXmlRootFilter(versionFilter);
304 
305                 ContentHandler handler = session.getImportContentHandler(basepath, importMode);
306                 finalReader.setContentHandler(handler);
307 
308                 // parse XML, import is done by handler from session
309                 try {
310                     finalReader.parse(new InputSource(xmlStream));
311                 }
312                 finally {
313                     IOUtils.closeQuietly(xmlStream);
314                 }
315 
316                 if (((ImportXmlRootFilter) finalReader).rootNodeFound) {
317                     String path = basepath;
318                     if (!path.endsWith(SLASH)) {
319                         path += SLASH;
320                     }
321 
322                     Node dummyRoot = (Node) session.getItem(path + JCR_ROOT);
323                     for (Iterator iter = dummyRoot.getNodes(); iter.hasNext();) {
324                         Node child = (Node) iter.next();
325                         // move childs to real root
326 
327                         if (session.itemExists(path + child.getName())) {
328                             session.getItem(path + child.getName()).remove();
329                         }
330 
331                         session.move(child.getPath(), path + child.getName());
332                     }
333                     // delete the dummy node
334                     dummyRoot.remove();
335                 }
336             }
337         }
338         catch (Exception e) {
339             throw new RuntimeException("Error importing " + name + ": " + e.getMessage(), e);
340         }
341         finally {
342             IOUtils.closeQuietly(xmlStream);
343         }
344 
345         try {
346             if (saveAfterImport) {
347                 session.save();
348             }
349         }
350         catch (RepositoryException e) {
351             log.error(MessageFormat.format(
352                     "Unable to save changes to the [{0}] repository due to a {1} Exception: {2}.", //$NON-NLS-1$
353                     new Object[]{repositoryName, e.getClass().getName(), e.getMessage()}), e);
354             throw new IOException(e.getMessage());
355         }
356     }
357 
358     /**
359      * @param file
360      * @return XSL stream for Xml file or <code>null</code>
361      */
362     protected static InputStream getXslStreamForXmlFile(File file) {
363         InputStream xslStream = null;
364         String xlsFilename = StringUtils.substringBeforeLast(file.getAbsolutePath(), ".") + ".xsl"; //$NON-NLS-1$
365         File xslFile = new File(xlsFilename);
366         if (xslFile.exists()) {
367             try {
368                 xslStream = new FileInputStream(xslFile);
369                 log.info("XSL file for [" + file.getName() + "] found (" + xslFile.getName() + ")"); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$
370             } catch (FileNotFoundException e) { // should never happen (xslFile.exists())
371                 e.printStackTrace();
372             }
373         }
374         return xslStream;
375     }
376 
377     /**
378      * creates a stream from the (zipped/gzipped) XML file
379      * @param xmlFile
380      * @return stream of the file
381      * @throws IOException
382      */
383     private static InputStream getInputStreamForFile(File xmlFile) throws IOException {
384         InputStream xmlStream;
385         // looks like the zip one is buggy. It throws exception when trying to use it
386         if (xmlFile.getName().endsWith(ZIP)) {
387             xmlStream = new ZipInputStream((new FileInputStream(xmlFile)));
388         } else if (xmlFile.getName().endsWith(GZ)) {
389             xmlStream = new GZIPInputStream((new FileInputStream(xmlFile)));
390         } else { // if(fileName.endsWith(XML))
391             xmlStream = new FileInputStream(xmlFile);
392         }
393         return xmlStream;
394     }
395 
396     public static void executeExport(OutputStream baseOutputStream, boolean keepVersionHistory, boolean format,
397                                      Session session, String basepath, String repository, String ext) throws IOException {
398         OutputStream outputStream = baseOutputStream;
399         if (ext.endsWith(ZIP)) {
400             outputStream = new ZipOutputStream(baseOutputStream);
401         } else if (ext.endsWith(GZ)) {
402             outputStream = new GZIPOutputStream(baseOutputStream);
403         }
404 
405         try {
406             if (keepVersionHistory) {
407                 // use exportSystemView in order to preserve property types
408                 // http://issues.apache.org/jira/browse/JCR-115
409                 if (!format) {
410                     session.exportSystemView(basepath, outputStream, false, false);
411                 } else {
412                     parseAndFormat(outputStream, null, repository, basepath, session, false);
413                 }
414             } else {
415                 // use XMLSerializer and a SAXFilter in order to rewrite the
416                 // file
417                 XMLReader reader = new VersionFilter(XMLReaderFactory
418                         .createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName()));
419                 parseAndFormat(outputStream, reader, repository, basepath, session, false);
420             }
421         }
422         catch (IOException e) {
423             throw new RuntimeException(e);
424         }
425         catch (SAXException e) {
426             throw new RuntimeException(e);
427         }
428         catch (RepositoryException e) {
429             throw new RuntimeException(e);
430         }
431 
432         // finish the stream properly if zip stream
433         // this is not done by the IOUtils
434         if (outputStream instanceof DeflaterOutputStream) {
435             ((DeflaterOutputStream) outputStream).finish();
436         }
437 
438         baseOutputStream.flush();
439         IOUtils.closeQuietly(baseOutputStream);
440     }
441 
442     /**
443      * This export the content of the repository, and format it if necessary
444      * @param stream the stream to write the content to
445      * @param reader the reader to use to parse the xml content (so that we can perform filtering), if null instanciate
446      * a default one
447      * @param repository the repository to export
448      * @param basepath the basepath in the repository
449      * @param session the session to use to export the data from the repository
450      * @param noRecurse
451      * @throws IOException
452      * @throws SAXException
453      * @throws RepositoryException
454      * @throws PathNotFoundException
455      */
456     public static void parseAndFormat(OutputStream stream, XMLReader reader, String repository, String basepath,
457                                       Session session, boolean noRecurse)
458             throws IOException, SAXException, PathNotFoundException, RepositoryException {
459 
460         if (reader == null) {
461             reader = XMLReaderFactory.createXMLReader(org.apache.xerces.parsers.SAXParser.class.getName());
462         }
463 
464         // write to a temp file and then re-read it to remove version history
465         File tempFile = File.createTempFile("export-" + repository + session.getUserID(), ".xml"); //$NON-NLS-1$ //$NON-NLS-2$
466         OutputStream fileStream = new FileOutputStream(tempFile);
467 
468         try {
469             session.exportSystemView(basepath, fileStream, false, noRecurse);
470         }
471         finally {
472             IOUtils.closeQuietly(fileStream);
473         }
474 
475         readFormatted(reader, tempFile, stream);
476 
477         if (!tempFile.delete()) {
478             log.warn("Could not delete temporary export file {}", tempFile.getAbsolutePath()); //$NON-NLS-1$
479         }
480     }
481 
482     /**
483      * @param reader
484      * @param inputFile
485      * @param outputStream
486      * @throws FileNotFoundException
487      * @throws IOException
488      * @throws SAXException
489      */
490     protected static void readFormatted(XMLReader reader, File inputFile, OutputStream outputStream)
491             throws FileNotFoundException, IOException, SAXException {
492         InputStream fileInputStream = new FileInputStream(inputFile);
493         readFormatted(reader, fileInputStream, outputStream);
494         IOUtils.closeQuietly(fileInputStream);
495     }
496 
497     /**
498      * @param reader
499      * @param inputStream
500      * @param outputStream
501      * @throws FileNotFoundException
502      * @throws IOException
503      * @throws SAXException
504      */
505     protected static void readFormatted(XMLReader reader, InputStream inputStream, OutputStream outputStream)
506             throws FileNotFoundException, IOException, SAXException {
507 
508         OutputFormat outputFormat = new OutputFormat();
509 
510         outputFormat.setPreserveSpace(false); // this is ok, doesn't affect text nodes??
511         outputFormat.setIndenting(true);
512         outputFormat.setIndent(INDENT_VALUE);
513         outputFormat.setLineWidth(120); // need to be set after setIndenting()!
514 
515         MetadataUuidFilter metadataUuidFilter = new MetadataUuidFilter(reader, !SystemProperty
516             .getBooleanProperty("magnolia.export.keep_extra_namespaces")); // MAGNOLIA-1650
517         metadataUuidFilter.setContentHandler(new XMLSerializer(outputStream, outputFormat));
518         metadataUuidFilter.parse(new InputSource(inputStream));
519 
520         IOUtils.closeQuietly(inputStream);
521     }
522     
523     /**
524      * 
525      * @param path path to encode
526      * @param separator "." (dot) or "/", it will be not encoded if found 
527      * @param enc charset 
528      * @return the path encoded
529      */
530     public static String encodePath(String path, String separator, String enc)
531     {
532         String pathEncoded = StringUtils.EMPTY; 
533         try
534         {
535             if (!StringUtils.contains(path, separator))
536             {
537                 return URLEncoder.encode(path, enc);
538             }
539             String[] tokens = StringUtils.split(path, separator);
540             for (int j = 0; j < tokens.length; j++)
541             {
542                 if (j == 0 && StringUtils.startsWith(path, separator))
543                 {
544                     pathEncoded += separator;
545                 }
546                 pathEncoded += URLEncoder.encode(tokens[j], enc);
547 
548                 if ((j == tokens.length - 1 && StringUtils.endsWith(path, separator)) || j < tokens.length - 1)
549                 {
550                     pathEncoded += separator;
551                 }
552 
553             }
554         }
555         catch (UnsupportedEncodingException e)
556         {
557             return path;
558         }
559         return pathEncoded;
560     }
561     
562     /**
563      * decode a path (ex. %D0%9D%D0%B0.%B2%D0%BE%D0%BB%D0%BD)
564      * @param path path to decode
565      * @param enc charset
566      * @return the path decoded
567      */
568     public static String decodePath(String path, String enc)
569     {
570         String pathEncoded = StringUtils.EMPTY;
571         try
572         {
573             pathEncoded = URLDecoder.decode(path, enc);
574         }
575         catch (UnsupportedEncodingException e)
576         {
577             return path;
578         }
579         return pathEncoded;
580     }
581 
582 
583 }