View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.io.FileNotFoundException;
21  import java.io.FileOutputStream;
22  import java.io.IOException;
23  import java.io.OutputStream;
24  import java.io.OutputStreamWriter;
25  import java.io.StringWriter;
26  import java.io.Writer;
27  import java.util.Locale;
28  import java.util.regex.Matcher;
29  import java.util.regex.Pattern;
30  
31  /**
32   * Character stream that handles (or at least attempts to) all the necessary Voodo to figure out the charset encoding of
33   * the XML document written to the stream.
34   * 
35   * @author <a href="mailto:hboutemy@codehaus.org">Herve Boutemy</a>
36   * @version $Id$
37   * @since 1.4.4
38   */
39  public class XmlStreamWriter
40      extends Writer
41  {
42      private static final int BUFFER_SIZE = 4096;
43  
44      private StringWriter xmlPrologWriter = new StringWriter( BUFFER_SIZE );
45  
46      private OutputStream out;
47  
48      private Writer writer;
49  
50      private String encoding;
51  
52      public XmlStreamWriter( OutputStream out )
53      {
54          this.out = out;
55      }
56  
57      public XmlStreamWriter( File file )
58          throws FileNotFoundException
59      {
60          this( new FileOutputStream( file ) );
61      }
62  
63      public String getEncoding()
64      {
65          return encoding;
66      }
67  
68      public void close()
69          throws IOException
70      {
71          if ( writer == null )
72          {
73              encoding = "UTF-8";
74              writer = new OutputStreamWriter( out, encoding );
75              writer.write( xmlPrologWriter.toString() );
76          }
77          writer.close();
78      }
79  
80      public void flush()
81          throws IOException
82      {
83          if ( writer != null )
84          {
85              writer.flush();
86          }
87      }
88  
89      private void detectEncoding( char[] cbuf, int off, int len )
90          throws IOException
91      {
92          int size = len;
93          StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
94          if ( xmlProlog.length() + len > BUFFER_SIZE )
95          {
96              size = BUFFER_SIZE - xmlProlog.length();
97          }
98          xmlPrologWriter.write( cbuf, off, size );
99  
100         // try to determine encoding
101         if ( xmlProlog.length() >= 5 )
102         {
103             if ( xmlProlog.substring( 0, 5 ).equals( "<?xml" ) )
104             {
105                 // try to extract encoding from XML prolog
106                 int xmlPrologEnd = xmlProlog.indexOf( "?>" );
107                 if ( xmlPrologEnd > 0 )
108                 {
109                     // ok, full XML prolog written: let's extract encoding
110                     Matcher m = ENCODING_PATTERN.matcher( xmlProlog.substring( 0, xmlPrologEnd ) );
111                     if ( m.find() )
112                     {
113                         encoding = m.group( 1 ).toUpperCase( Locale.ENGLISH );
114                         encoding = encoding.substring( 1, encoding.length() - 1 );
115                     }
116                     else
117                     {
118                         // no encoding found in XML prolog: using default encoding
119                         encoding = "UTF-8";
120                     }
121                 }
122                 else
123                 {
124                     if ( xmlProlog.length() >= BUFFER_SIZE )
125                     {
126                         // no encoding found in first characters: using default encoding
127                         encoding = "UTF-8";
128                     }
129                 }
130             }
131             else
132             {
133                 // no XML prolog: using default encoding
134                 encoding = "UTF-8";
135             }
136             if ( encoding != null )
137             {
138                 // encoding has been chosen: let's do it
139                 xmlPrologWriter = null;
140                 writer = new OutputStreamWriter( out, encoding );
141                 writer.write( xmlProlog.toString() );
142                 if ( len > size )
143                 {
144                     writer.write( cbuf, off + size, len - size );
145                 }
146             }
147         }
148     }
149 
150     public void write( char[] cbuf, int off, int len )
151         throws IOException
152     {
153         if ( xmlPrologWriter != null )
154         {
155             detectEncoding( cbuf, off, len );
156         }
157         else
158         {
159             writer.write( cbuf, off, len );
160         }
161     }
162 
163     static final Pattern ENCODING_PATTERN = XmlReader.ENCODING_PATTERN;
164 }