View Javadoc
1   package org.codehaus.plexus.util.xml;
2   
3   /*
4    * Copyright The Codehaus Foundation.
5    *
6    * Licensed under the Apache License, Version 2.0 (the "License");
7    * you may not use this file except in compliance with the License.
8    * You may obtain a copy of the License at
9    *
10   *     http://www.apache.org/licenses/LICENSE-2.0
11   *
12   * Unless required by applicable law or agreed to in writing, software
13   * distributed under the License is distributed on an "AS IS" BASIS,
14   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15   * See the License for the specific language governing permissions and
16   * limitations under the License.
17   */
18  
19  import java.io.ByteArrayInputStream;
20  import java.io.IOException;
21  import java.io.InputStream;
22  import java.io.SequenceInputStream;
23  
24  import org.codehaus.plexus.util.IOUtil;
25  
26  import junit.framework.ComparisonFailure;
27  import junit.framework.TestCase;
28  
29  public class XmlStreamReaderTest
30      extends TestCase
31  {
32      /** french */
33      private static final String TEXT_LATIN1 = "eacute: \u00E9";
34  
35      /** greek */
36      private static final String TEXT_LATIN7 = "alpha: \u03B1";
37  
38      /** euro support */
39      private static final String TEXT_LATIN15 = "euro: \u20AC";
40  
41      /** japanese */
42      private static final String TEXT_EUC_JP = "hiragana A: \u3042";
43  
44      /** Unicode: support everything */
45      private static final String TEXT_UNICODE =
46          TEXT_LATIN1 + ", " + TEXT_LATIN7 + ", " + TEXT_LATIN15 + ", " + TEXT_EUC_JP;
47  
48      /** see http://unicode.org/faq/utf_bom.html#BOM */
49      private static final byte[] BOM_UTF8 = { (byte) 0xEF, (byte) 0xBB, (byte) 0xBF };
50  
51      private static final byte[] BOM_UTF16BE = { (byte) 0xFE, (byte) 0xFF };
52  
53      private static final byte[] BOM_UTF16LE = { (byte) 0xFF, (byte) 0xFE };
54  
55      private static final byte[] BOM_UTF32BE = { (byte) 0x00, (byte) 0x00, (byte) 0xFF, (byte) 0xFE };
56  
57      private static final byte[] BOM_UTF32LE = { (byte) 0xFF, (byte) 0xFE, (byte) 0x00, (byte) 0x00 };
58  
59      private static String createXmlContent( String text, String encoding )
60      {
61          String xmlDecl = "<?xml version=\"1.0\"?>";
62          if ( encoding != null )
63          {
64              xmlDecl = "<?xml version=\"1.0\" encoding=\"" + encoding + "\"?>";
65          }
66          String xml = xmlDecl + "\n<text>" + text + "</text>";
67          return xml;
68      }
69  
70      private static void checkXmlContent( String xml, String encoding )
71          throws IOException
72      {
73          checkXmlContent( xml, encoding, null );
74      }
75  
76      private static void checkXmlContent( String xml, String encoding, byte... bom )
77          throws IOException
78      {
79          byte[] xmlContent = xml.getBytes( encoding );
80          InputStream in = new ByteArrayInputStream( xmlContent );
81  
82          if ( bom != null )
83          {
84              in = new SequenceInputStream( new ByteArrayInputStream( bom ), in );
85          }
86  
87          XmlStreamReader reader = new XmlStreamReader( in );
88          assertEquals( encoding, reader.getEncoding() );
89          String result = IOUtil.toString( reader );
90          assertEquals( xml, result );
91      }
92  
93      private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding )
94          throws IOException
95      {
96          checkXmlStreamReader( text, encoding, effectiveEncoding, null );
97      }
98  
99      private static void checkXmlStreamReader( String text, String encoding )
100         throws IOException
101     {
102         checkXmlStreamReader( text, encoding, encoding, null );
103     }
104 
105     private static void checkXmlStreamReader( String text, String encoding, byte... bom )
106         throws IOException
107     {
108         checkXmlStreamReader( text, encoding, encoding, bom );
109     }
110 
111     private static void checkXmlStreamReader( String text, String encoding, String effectiveEncoding, byte... bom )
112         throws IOException
113     {
114         String xml = createXmlContent( text, encoding );
115         checkXmlContent( xml, effectiveEncoding, bom );
116     }
117 
118     public void testNoXmlHeader()
119         throws IOException
120     {
121         String xml = "<text>text with no XML header</text>";
122         checkXmlContent( xml, "UTF-8" );
123         checkXmlContent( xml, "UTF-8", BOM_UTF8 );
124     }
125 
126     public void testDefaultEncoding()
127         throws IOException
128     {
129         checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8" );
130         checkXmlStreamReader( TEXT_UNICODE, null, "UTF-8", BOM_UTF8 );
131     }
132 
133     public void testUTF8Encoding()
134         throws IOException
135     {
136         checkXmlStreamReader( TEXT_UNICODE, "UTF-8" );
137         checkXmlStreamReader( TEXT_UNICODE, "UTF-8", BOM_UTF8 );
138     }
139 
140     public void testUTF16Encoding()
141         throws IOException
142     {
143         checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", null );
144         checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16LE", BOM_UTF16LE );
145         checkXmlStreamReader( TEXT_UNICODE, "UTF-16", "UTF-16BE", BOM_UTF16BE );
146     }
147 
148     public void testUTF16BEEncoding()
149         throws IOException
150     {
151         checkXmlStreamReader( TEXT_UNICODE, "UTF-16BE" );
152     }
153 
154     public void testUTF16LEEncoding()
155         throws IOException
156     {
157         checkXmlStreamReader( TEXT_UNICODE, "UTF-16LE" );
158     }
159 
160     public void testLatin1Encoding()
161         throws IOException
162     {
163         checkXmlStreamReader( TEXT_LATIN1, "ISO-8859-1" );
164     }
165 
166     public void testLatin7Encoding()
167         throws IOException
168     {
169         checkXmlStreamReader( TEXT_LATIN7, "ISO-8859-7" );
170     }
171 
172     public void testLatin15Encoding()
173         throws IOException
174     {
175         checkXmlStreamReader( TEXT_LATIN15, "ISO-8859-15" );
176     }
177 
178     public void testEUC_JPEncoding()
179         throws IOException
180     {
181         checkXmlStreamReader( TEXT_EUC_JP, "EUC-JP" );
182     }
183 
184     public void testEBCDICEncoding()
185         throws IOException
186     {
187         checkXmlStreamReader( "simple text in EBCDIC", "CP1047" );
188     }
189 
190     public void testInappropriateEncoding()
191         throws IOException
192     {
193         try
194         {
195             checkXmlStreamReader( TEXT_UNICODE, "ISO-8859-2" );
196             fail( "Check should have failed, since some characters are not available in the specified encoding" );
197         }
198         catch ( ComparisonFailure cf )
199         {
200             // expected failure, since the encoding does not contain some characters
201         }
202     }
203 
204     public void testEncodingAttribute()
205         throws IOException
206     {
207         String xml = "<?xml version='1.0' encoding='US-ASCII'?><element encoding='attribute value'/>";
208         checkXmlContent( xml, "US-ASCII" );
209 
210         xml = "<?xml version='1.0' encoding  =  'US-ASCII'  ?><element encoding='attribute value'/>";
211         checkXmlContent( xml, "US-ASCII" );
212 
213         xml = "<?xml version='1.0'?><element encoding='attribute value'/>";
214         checkXmlContent( xml, "UTF-8" );
215 
216         xml = "<?xml\nversion='1.0'\nencoding\n=\n'US-ASCII'\n?>\n<element encoding='attribute value'/>";
217         checkXmlContent( xml, "US-ASCII" );
218 
219         xml = "<?xml\nversion='1.0'\n?>\n<element encoding='attribute value'/>";
220         checkXmlContent( xml, "UTF-8" );
221 
222         xml = "<element encoding='attribute value'/>";
223         checkXmlContent( xml, "UTF-8" );
224     }
225 }