View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.OutputStream;
23  import java.io.OutputStreamWriter;
24  import java.io.Reader;
25  import java.io.Writer;
26  
27  import org.codehaus.plexus.util.IOUtil;
28  import org.codehaus.plexus.util.ReaderFactory;
29  import org.codehaus.plexus.util.StringUtils;
30  import org.codehaus.plexus.util.WriterFactory;
31  import org.codehaus.plexus.util.xml.pull.MXParser;
32  import org.codehaus.plexus.util.xml.pull.XmlPullParser;
33  import org.codehaus.plexus.util.xml.pull.XmlPullParserException;
34  
35  /**
36   * Common XML utilities methods.
37   *
38   * @author <a href="mailto:vincent.siveton@gmail.com">Vincent Siveton</a>
39   * @version $Id$
40   * @since 1.5.7
41   */
42  public class XmlUtil
43  {
44      /** The default line indenter size i.e. 2. */
45      public static final int DEFAULT_INDENTATION_SIZE = 2;
46  
47      /** The default line separator ("\n" on UNIX) */
48      public static final String DEFAULT_LINE_SEPARATOR = System.getProperty( "line.separator" );
49  
50      /**
51       * Determines if a given File shall be handled as XML.
52       *
53       * @param f not null file
54       * @return <code>true</code> if the given file has XML content, <code>false</code> otherwise.
55       */
56      public static boolean isXml( File f )
57      {
58          if ( f == null )
59          {
60              throw new IllegalArgumentException( "f could not be null." );
61          }
62  
63          if ( !f.isFile() )
64          {
65              throw new IllegalArgumentException( "The file '" + f.getAbsolutePath() + "' is not a file." );
66          }
67  
68          Reader reader = null;
69          try
70          {
71              reader = ReaderFactory.newXmlReader( f );
72              XmlPullParser parser = new MXParser();
73              parser.setInput( reader );
74              parser.nextToken();
75              reader.close();
76              reader = null;
77              return true;
78          }
79          catch ( Exception e )
80          {
81              return false;
82          }
83          finally
84          {
85              IOUtil.close( reader );
86          }
87      }
88  
89      /**
90       * Pretty format the input reader. For instance, the following input:
91       * 
92       * <pre>
93       * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
94       * </pre>
95       * 
96       * becomes
97       * 
98       * <pre>
99       * &lt;div&gt;
100      *   &lt;b&gt;content&lt;/b&gt;
101      * &lt;/div&gt;
102      * </pre>
103      *
104      * @param reader not null
105      * @param writer not null
106      * @throws IOException if any or invalid xml content
107      * @see #prettyFormat(Reader, Writer, int, String)
108      * @see ReaderFactory to read an xml content
109      * @see WriterFactory to write an xml content
110      */
111     public static void prettyFormat( Reader reader, Writer writer )
112         throws IOException
113     {
114         prettyFormat( reader, writer, DEFAULT_INDENTATION_SIZE, DEFAULT_LINE_SEPARATOR );
115     }
116 
117     /**
118      * Pretty format the input reader. For instance, the following input:
119      * 
120      * <pre>
121      * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
122      * </pre>
123      * 
124      * becomes
125      * 
126      * <pre>
127      * &lt;div&gt;
128      *   &lt;b&gt;content&lt;/b&gt;
129      * &lt;/div&gt;
130      * </pre>
131      *
132      * @param reader not null
133      * @param writer not null
134      * @param indentSize positive number for the indentation
135      * @param lineSeparator the wanted line separator
136      * @throws IOException if any or invalid xml content
137      * @see ReaderFactory to read an xml content
138      * @see WriterFactory to write an xml content
139      */
140     public static void prettyFormat( Reader reader, Writer writer, int indentSize, String lineSeparator )
141         throws IOException
142     {
143         if ( reader == null )
144         {
145             throw new IllegalArgumentException( "The reader is null" );
146         }
147         if ( writer == null )
148         {
149             throw new IllegalArgumentException( "The writer is null" );
150         }
151         if ( indentSize < 0 )
152         {
153             indentSize = 0;
154         }
155 
156         PrettyPrintXMLWriter xmlWriter = new PrettyPrintXMLWriter( writer );
157         xmlWriter.setLineIndenter( StringUtils.repeat( " ", indentSize ) );
158         xmlWriter.setLineSeparator( lineSeparator );
159 
160         XmlPullParser parser = new MXParser();
161         try
162         {
163             parser.setInput( reader );
164 
165             prettyFormatInternal( parser, xmlWriter );
166         }
167         catch ( XmlPullParserException e )
168         {
169             throw new IOException( "Unable to parse the XML: " + e.getMessage() );
170         }
171     }
172 
173     /**
174      * Pretty format the input stream. For instance, the following input:
175      * 
176      * <pre>
177      * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
178      * </pre>
179      * 
180      * becomes
181      * 
182      * <pre>
183      * &lt;div&gt;
184      *   &lt;b&gt;content&lt;/b&gt;
185      * &lt;/div&gt;
186      * </pre>
187      *
188      * @param is not null
189      * @param os not null
190      * @throws IOException if any or invalid xml content
191      * @see #prettyFormat(InputStream, OutputStream, int, String)
192      */
193     public static void prettyFormat( InputStream is, OutputStream os )
194         throws IOException
195     {
196         prettyFormat( is, os, DEFAULT_INDENTATION_SIZE, DEFAULT_LINE_SEPARATOR );
197     }
198 
199     /**
200      * Pretty format the input stream. For instance, the following input:
201      * 
202      * <pre>
203      * &lt;div&gt;&lt;b&gt;content&lt;/b&gt;&lt;/div&gt;
204      * </pre>
205      * 
206      * becomes
207      * 
208      * <pre>
209      * &lt;div&gt;
210      *   &lt;b&gt;content&lt;/b&gt;
211      * &lt;/div&gt;
212      * </pre>
213      *
214      * @param is not null
215      * @param os not null
216      * @param indentSize positive number for the indentation
217      * @param lineSeparator the wanted line separator
218      * @throws IOException if any or invalid xml content
219      */
220     public static void prettyFormat( InputStream is, OutputStream os, int indentSize, String lineSeparator )
221         throws IOException
222     {
223         if ( is == null )
224         {
225             throw new IllegalArgumentException( "The is is null" );
226         }
227         if ( os == null )
228         {
229             throw new IllegalArgumentException( "The os is null" );
230         }
231         if ( indentSize < 0 )
232         {
233             indentSize = 0;
234         }
235 
236         Reader reader = null;
237         Writer writer = null;
238         try
239         {
240             reader = ReaderFactory.newXmlReader( is );
241             writer = new OutputStreamWriter( os );
242 
243             final PrettyPrintXMLWriter xmlWriter = new PrettyPrintXMLWriter( writer );
244             xmlWriter.setLineIndenter( StringUtils.repeat( " ", indentSize ) );
245             xmlWriter.setLineSeparator( lineSeparator );
246 
247             final XmlPullParser parser = new MXParser();
248             parser.setInput( reader );
249 
250             prettyFormatInternal( parser, xmlWriter );
251 
252             writer.close();
253             writer = null;
254 
255             reader.close();
256             reader = null;
257         }
258         catch ( XmlPullParserException e )
259         {
260             throw new IOException( "Unable to parse the XML: " + e.getMessage() );
261         }
262         finally
263         {
264             IOUtil.close( writer );
265             IOUtil.close( reader );
266         }
267     }
268 
269     /**
270      * @param parser not null
271      * @param writer not null
272      * @throws XmlPullParserException if any
273      * @throws IOException if any
274      */
275     private static void prettyFormatInternal( XmlPullParser parser, PrettyPrintXMLWriter writer )
276         throws XmlPullParserException, IOException
277     {
278         boolean hasTag = false;
279         boolean hasComment = false;
280         int eventType = parser.getEventType();
281         while ( eventType != XmlPullParser.END_DOCUMENT )
282         {
283             if ( eventType == XmlPullParser.START_TAG )
284             {
285                 hasTag = true;
286                 if ( hasComment )
287                 {
288                     writer.writeText( writer.getLineIndenter() );
289                     hasComment = false;
290                 }
291                 writer.startElement( parser.getName() );
292                 for ( int i = 0; i < parser.getAttributeCount(); i++ )
293                 {
294                     String key = parser.getAttributeName( i );
295                     String value = parser.getAttributeValue( i );
296                     writer.addAttribute( key, value );
297                 }
298             }
299             else if ( eventType == XmlPullParser.TEXT )
300             {
301                 String text = parser.getText();
302                 if ( !text.trim().equals( "" ) )
303                 {
304                     text = StringUtils.removeDuplicateWhitespace( text );
305                     writer.writeText( text );
306                 }
307             }
308             else if ( eventType == XmlPullParser.END_TAG )
309             {
310                 hasTag = false;
311                 writer.endElement();
312             }
313             else if ( eventType == XmlPullParser.COMMENT )
314             {
315                 hasComment = true;
316                 if ( !hasTag )
317                 {
318                     writer.writeMarkup( writer.getLineSeparator() );
319                     for ( int i = 0; i < writer.getDepth(); i++ )
320                     {
321                         writer.writeMarkup( writer.getLineIndenter() );
322                     }
323                 }
324                 writer.writeMarkup( "<!--" + parser.getText().trim() + " -->" );
325                 if ( !hasTag )
326                 {
327                     writer.writeMarkup( writer.getLineSeparator() );
328                     for ( int i = 0; i < writer.getDepth() - 1; i++ )
329                     {
330                         writer.writeMarkup( writer.getLineIndenter() );
331                     }
332                 }
333             }
334             else if ( eventType == XmlPullParser.DOCDECL )
335             {
336                 writer.writeMarkup( "<!DOCTYPE" + parser.getText() + ">" );
337                 writer.endOfLine();
338             }
339             else if ( eventType == XmlPullParser.PROCESSING_INSTRUCTION )
340             {
341                 writer.writeMarkup( "<?" + parser.getText() + "?>" );
342                 writer.endOfLine();
343             }
344             else if ( eventType == XmlPullParser.CDSECT )
345             {
346                 writer.writeMarkup( "<![CDATA[" + parser.getText() + "]]>" );
347             }
348             else if ( eventType == XmlPullParser.ENTITY_REF )
349             {
350                 writer.writeMarkup( "&" + parser.getName() + ";" );
351             }
352 
353             eventType = parser.nextToken();
354         }
355     }
356 }