View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.File;
20  import java.io.IOException;
21  import java.io.OutputStream;
22  import java.io.OutputStreamWriter;
23  import java.io.StringWriter;
24  import java.io.Writer;
25  import java.nio.file.Files;
26  import java.util.Locale;
27  import java.util.regex.Matcher;
28  import java.util.regex.Pattern;
29  
30  /**
31   * Character stream that handles (or at least attempts to) all the necessary Voodo to figure out the charset encoding of
32   * the XML document written to the stream.
33   *
34   * @author <a href="mailto:hboutemy@codehaus.org">Herve Boutemy</a>
35   *
36   * @since 1.4.4
37   */
38  public class XmlStreamWriter extends Writer {
39      private static final int BUFFER_SIZE = 4096;
40  
41      private StringWriter xmlPrologWriter = new StringWriter(BUFFER_SIZE);
42  
43      private OutputStream out;
44  
45      private Writer writer;
46  
47      private String encoding;
48  
49      public XmlStreamWriter(OutputStream out) {
50          this.out = out;
51      }
52  
53      public XmlStreamWriter(File file) throws IOException {
54          this(Files.newOutputStream(file.toPath()));
55      }
56  
57      public String getEncoding() {
58          return encoding;
59      }
60  
61      @Override
62      public void close() throws IOException {
63          if (writer == null) {
64              encoding = "UTF-8";
65              writer = new OutputStreamWriter(out, encoding);
66              writer.write(xmlPrologWriter.toString());
67          }
68          writer.close();
69      }
70  
71      @Override
72      public void flush() throws IOException {
73          if (writer != null) {
74              writer.flush();
75          }
76      }
77  
78      private void detectEncoding(char[] cbuf, int off, int len) throws IOException {
79          int size = len;
80          StringBuffer xmlProlog = xmlPrologWriter.getBuffer();
81          if (xmlProlog.length() + len > BUFFER_SIZE) {
82              size = BUFFER_SIZE - xmlProlog.length();
83          }
84          xmlPrologWriter.write(cbuf, off, size);
85  
86          // try to determine encoding
87          if (xmlProlog.length() >= 5) {
88              if (xmlProlog.substring(0, 5).equals("<?xml")) {
89                  // try to extract encoding from XML prolog
90                  int xmlPrologEnd = xmlProlog.indexOf("?>");
91                  if (xmlPrologEnd > 0) {
92                      // ok, full XML prolog written: let's extract encoding
93                      Matcher m = ENCODING_PATTERN.matcher(xmlProlog.substring(0, xmlPrologEnd));
94                      if (m.find()) {
95                          encoding = m.group(1).toUpperCase(Locale.ENGLISH);
96                          encoding = encoding.substring(1, encoding.length() - 1);
97                      } else {
98                          // no encoding found in XML prolog: using default encoding
99                          encoding = "UTF-8";
100                     }
101                 } else {
102                     if (xmlProlog.length() >= BUFFER_SIZE) {
103                         // no encoding found in first characters: using default encoding
104                         encoding = "UTF-8";
105                     }
106                 }
107             } else {
108                 // no XML prolog: using default encoding
109                 encoding = "UTF-8";
110             }
111             if (encoding != null) {
112                 // encoding has been chosen: let's do it
113                 xmlPrologWriter = null;
114                 writer = new OutputStreamWriter(out, encoding);
115                 writer.write(xmlProlog.toString());
116                 if (len > size) {
117                     writer.write(cbuf, off + size, len - size);
118                 }
119             }
120         }
121     }
122 
123     @Override
124     public void write(char[] cbuf, int off, int len) throws IOException {
125         if (xmlPrologWriter != null) {
126             detectEncoding(cbuf, off, len);
127         } else {
128             writer.write(cbuf, off, len);
129         }
130     }
131 
132     static final Pattern ENCODING_PATTERN = XmlReader.ENCODING_PATTERN;
133 }