View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.PrintWriter;
20  import java.io.Writer;
21  import java.util.LinkedList;
22  import java.util.regex.Matcher;
23  import java.util.regex.Pattern;
24  
25  import org.codehaus.plexus.util.StringUtils;
26  
27  /**
28   * Implementation of XMLWriter which emits nicely formatted documents.
29   *
30   * @version $Id$
31   */
32  public class PrettyPrintXMLWriter
33      implements XMLWriter
34  {
35      /** Line separator ("\n" on UNIX) */
36      protected static final String LS = System.getProperty( "line.separator" );
37  
38      private PrintWriter writer;
39  
40      private LinkedList<String> elementStack = new LinkedList<String>();
41  
42      private boolean tagInProgress;
43  
44      private int depth;
45  
46      private String lineIndenter;
47  
48      private String lineSeparator;
49  
50      private String encoding;
51  
52      private String docType;
53  
54      private boolean readyForNewLine;
55  
56      private boolean tagIsEmpty;
57  
58      /**
59       * @param writer not null
60       * @param lineIndenter could be null, but the normal way is some spaces.
61       */
62      public PrettyPrintXMLWriter( PrintWriter writer, String lineIndenter )
63      {
64          this( writer, lineIndenter, null, null );
65      }
66  
67      /**
68       * @param writer not null
69       * @param lineIndenter could be null, but the normal way is some spaces.
70       */
71      public PrettyPrintXMLWriter( Writer writer, String lineIndenter )
72      {
73          this( new PrintWriter( writer ), lineIndenter );
74      }
75  
76      /**
77       * @param writer not null
78       */
79      public PrettyPrintXMLWriter( PrintWriter writer )
80      {
81          this( writer, null, null );
82      }
83  
84      /**
85       * @param writer not null
86       */
87      public PrettyPrintXMLWriter( Writer writer )
88      {
89          this( new PrintWriter( writer ) );
90      }
91  
92      /**
93       * @param writer not null
94       * @param lineIndenter could be null, but the normal way is some spaces.
95       * @param encoding could be null or invalid.
96       * @param doctype could be null.
97       */
98      public PrettyPrintXMLWriter( PrintWriter writer, String lineIndenter, String encoding, String doctype )
99      {
100         this( writer, lineIndenter, LS, encoding, doctype );
101     }
102 
103     /**
104      * @param writer not null
105      * @param lineIndenter could be null, but the normal way is some spaces.
106      * @param encoding could be null or invalid.
107      * @param doctype could be null.
108      */
109     public PrettyPrintXMLWriter( Writer writer, String lineIndenter, String encoding, String doctype )
110     {
111         this( new PrintWriter( writer ), lineIndenter, encoding, doctype );
112     }
113 
114     /**
115      * @param writer not null
116      * @param encoding could be null or invalid.
117      * @param doctype could be null.
118      */
119     public PrettyPrintXMLWriter( PrintWriter writer, String encoding, String doctype )
120     {
121         this( writer, "  ", encoding, doctype );
122     }
123 
124     /**
125      * @param writer not null
126      * @param encoding could be null or invalid.
127      * @param doctype could be null.
128      */
129     public PrettyPrintXMLWriter( Writer writer, String encoding, String doctype )
130     {
131         this( new PrintWriter( writer ), encoding, doctype );
132     }
133 
134     /**
135      * @param writer not null
136      * @param lineIndenter could be null, but the normal way is some spaces.
137      * @param lineSeparator could be null, but the normal way is valid line separator ("\n" on UNIX).
138      * @param encoding could be null or invalid.
139      * @param doctype could be null.
140      */
141     public PrettyPrintXMLWriter( PrintWriter writer, String lineIndenter, String lineSeparator, String encoding,
142                                  String doctype )
143     {
144         setWriter( writer );
145 
146         setLineIndenter( lineIndenter );
147 
148         setLineSeparator( lineSeparator );
149 
150         setEncoding( encoding );
151 
152         setDocType( doctype );
153 
154         if ( doctype != null || encoding != null )
155         {
156             writeDocumentHeaders();
157         }
158     }
159 
160     /** {@inheritDoc} */
161     public void startElement( String name )
162     {
163         tagIsEmpty = false;
164 
165         finishTag();
166 
167         write( "<" );
168 
169         write( name );
170 
171         elementStack.addLast( name );
172 
173         tagInProgress = true;
174 
175         setDepth( getDepth() + 1 );
176 
177         readyForNewLine = true;
178 
179         tagIsEmpty = true;
180     }
181 
182     /** {@inheritDoc} */
183     public void writeText( String text )
184     {
185         writeText( text, true );
186     }
187 
188     /** {@inheritDoc} */
189     public void writeMarkup( String text )
190     {
191         writeText( text, false );
192     }
193 
194     private void writeText( String text, boolean escapeXml )
195     {
196         readyForNewLine = false;
197 
198         tagIsEmpty = false;
199 
200         finishTag();
201 
202         if ( escapeXml )
203         {
204             text = escapeXml( text );
205         }
206 
207         write( StringUtils.unifyLineSeparators( text, lineSeparator ) );
208     }
209 
210     private static final Pattern amp = Pattern.compile( "&" );
211 
212     private static final Pattern lt = Pattern.compile( "<" );
213 
214     private static final Pattern gt = Pattern.compile( ">" );
215 
216     private static final Pattern dqoute = Pattern.compile( "\"" );
217 
218     private static final Pattern sqoute = Pattern.compile( "\'" );
219 
220     private static String escapeXml( String text )
221     {
222         if ( text.indexOf( '&' ) >= 0 )
223         {
224             text = amp.matcher( text ).replaceAll( "&amp;" );
225         }
226         if ( text.indexOf( '<' ) >= 0 )
227         {
228             text = lt.matcher( text ).replaceAll( "&lt;" );
229         }
230         if ( text.indexOf( '>' ) >= 0 )
231         {
232             text = gt.matcher( text ).replaceAll( "&gt;" );
233         }
234         if ( text.indexOf( '"' ) >= 0 )
235         {
236             text = dqoute.matcher( text ).replaceAll( "&quot;" );
237         }
238         if ( text.indexOf( '\'' ) >= 0 )
239         {
240             text = sqoute.matcher( text ).replaceAll( "&apos;" );
241         }
242 
243         return text;
244     }
245 
246     private static final String crlf_str = "\r\n";
247 
248     private static final Pattern crlf = Pattern.compile( crlf_str );
249 
250     private static final Pattern lowers = Pattern.compile( "([\000-\037])" );
251 
252     private static String escapeXmlAttribute( String text )
253     {
254         text = escapeXml( text );
255 
256         // Windows
257         Matcher crlfmatcher = crlf.matcher( text );
258         if ( text.contains( crlf_str ) )
259         {
260             text = crlfmatcher.replaceAll( "&#10;" );
261         }
262 
263         Matcher m = lowers.matcher( text );
264         StringBuffer b = new StringBuffer();
265         while ( m.find() )
266         {
267             m = m.appendReplacement( b, "&#" + Integer.toString( m.group( 1 ).charAt( 0 ) ) + ";" );
268         }
269         m.appendTail( b );
270 
271         return b.toString();
272     }
273 
274     /** {@inheritDoc} */
275     public void addAttribute( String key, String value )
276     {
277         write( " " );
278 
279         write( key );
280 
281         write( "=\"" );
282 
283         write( escapeXmlAttribute( value ) );
284 
285         write( "\"" );
286     }
287 
288     /** {@inheritDoc} */
289     public void endElement()
290     {
291         setDepth( getDepth() - 1 );
292 
293         if ( tagIsEmpty )
294         {
295             write( "/" );
296 
297             readyForNewLine = false;
298 
299             finishTag();
300 
301             elementStack.removeLast();
302         }
303         else
304         {
305             finishTag();
306 
307             write( "</" + elementStack.removeLast() + ">" );
308         }
309 
310         readyForNewLine = true;
311     }
312 
313     /**
314      * Write a string to the underlying writer
315      * 
316      * @param str
317      */
318     private void write( String str )
319     {
320         getWriter().write( str );
321     }
322 
323     private void finishTag()
324     {
325         if ( tagInProgress )
326         {
327             write( ">" );
328         }
329 
330         tagInProgress = false;
331 
332         if ( readyForNewLine )
333         {
334             endOfLine();
335         }
336         readyForNewLine = false;
337 
338         tagIsEmpty = false;
339     }
340 
341     /**
342      * Get the string used as line indenter
343      *
344      * @return the line indenter
345      */
346     protected String getLineIndenter()
347     {
348         return lineIndenter;
349     }
350 
351     /**
352      * Set the string used as line indenter
353      *
354      * @param lineIndenter new line indenter, could be null, but the normal way is some spaces.
355      */
356     protected void setLineIndenter( String lineIndenter )
357     {
358         this.lineIndenter = lineIndenter;
359     }
360 
361     /**
362      * Get the string used as line separator or LS if not set.
363      *
364      * @return the line separator
365      * @see #LS
366      */
367     protected String getLineSeparator()
368     {
369         return lineSeparator;
370     }
371 
372     /**
373      * Set the string used as line separator
374      *
375      * @param lineSeparator new line separator, could be null but the normal way is valid line separator ("\n" on UNIX).
376      */
377     protected void setLineSeparator( String lineSeparator )
378     {
379         this.lineSeparator = lineSeparator;
380     }
381 
382     /**
383      * Write the end of line character (using specified line separator) and start new line with indentation
384      *
385      * @see #getLineIndenter()
386      * @see #getLineSeparator()
387      */
388     protected void endOfLine()
389     {
390         write( getLineSeparator() );
391 
392         for ( int i = 0; i < getDepth(); i++ )
393         {
394             write( getLineIndenter() );
395         }
396     }
397 
398     private void writeDocumentHeaders()
399     {
400         write( "<?xml version=\"1.0\"" );
401 
402         if ( getEncoding() != null )
403         {
404             write( " encoding=\"" + getEncoding() + "\"" );
405         }
406 
407         write( "?>" );
408 
409         endOfLine();
410 
411         if ( getDocType() != null )
412         {
413             write( "<!DOCTYPE " );
414 
415             write( getDocType() );
416 
417             write( ">" );
418 
419             endOfLine();
420         }
421     }
422 
423     /**
424      * Set the underlying writer
425      *
426      * @param writer not null writer
427      */
428     protected void setWriter( PrintWriter writer )
429     {
430         if ( writer == null )
431         {
432             throw new IllegalArgumentException( "writer could not be null" );
433         }
434 
435         this.writer = writer;
436     }
437 
438     /**
439      * Get the underlying writer
440      *
441      * @return the underlying writer
442      */
443     protected PrintWriter getWriter()
444     {
445         return writer;
446     }
447 
448     /**
449      * Set the depth in the xml indentation
450      *
451      * @param depth new depth
452      */
453     protected void setDepth( int depth )
454     {
455         this.depth = depth;
456     }
457 
458     /**
459      * Get the current depth in the xml indentation
460      *
461      * @return the current depth
462      */
463     protected int getDepth()
464     {
465         return depth;
466     }
467 
468     /**
469      * Set the encoding in the xml
470      *
471      * @param encoding new encoding
472      */
473     protected void setEncoding( String encoding )
474     {
475         this.encoding = encoding;
476     }
477 
478     /**
479      * Get the current encoding in the xml
480      *
481      * @return the current encoding
482      */
483     protected String getEncoding()
484     {
485         return encoding;
486     }
487 
488     /**
489      * Set the docType in the xml
490      *
491      * @param docType new docType
492      */
493     protected void setDocType( String docType )
494     {
495         this.docType = docType;
496     }
497 
498     /**
499      * Get the docType in the xml
500      *
501      * @return the current docType
502      */
503     protected String getDocType()
504     {
505         return docType;
506     }
507 
508     /**
509      * @return the current elementStack;
510      */
511     protected LinkedList<String> getElementStack()
512     {
513         return elementStack;
514     }
515 }