Coverage Report - org.codehaus.plexus.util.xml.pull.XmlPullParser
 
Classes in this File Line Coverage Branch Coverage Complexity
XmlPullParser
N/A
N/A
1
 
 1  
 /* -*-             c-basic-offset: 4; indent-tabs-mode: nil; -*-  //------100-columns-wide------>|*/
 2  
 // for license please see accompanying LICENSE.txt file (available also at http://www.xmlpull.org/)
 3  
 
 4  
 package org.codehaus.plexus.util.xml.pull;
 5  
 
 6  
 import java.io.InputStream;
 7  
 import java.io.IOException;
 8  
 import java.io.Reader;
 9  
 
 10  
 /**
 11  
  * XML Pull Parser is an interface that defines parsing functionality provided in
 12  
  * <a href="http://www.xmlpull.org/">XMLPULL V1 API</a> (visit this website to learn more about API and its
 13  
  * implementations).
 14  
  * <p>
 15  
  * There are following different kinds of parser depending on which features are set:
 16  
  * <ul>
 17  
  * <li><b>non-validating</b> parser as defined in XML 1.0 spec when FEATURE_PROCESS_DOCDECL is set to true
 18  
  * <li><b>validating parser</b> as defined in XML 1.0 spec when FEATURE_VALIDATION is true (and that implies that
 19  
  * FEATURE_PROCESS_DOCDECL is true)
 20  
  * <li>when FEATURE_PROCESS_DOCDECL is false (this is default and if different value is required necessary must be
 21  
  * changed before parsing is started) then parser behaves like XML 1.0 compliant non-validating parser under condition
 22  
  * that <em>no DOCDECL is present</em> in XML documents (internal entities can still be defined with
 23  
  * defineEntityReplacementText()). This mode of operation is intended <b>for operation in constrained environments</b>
 24  
  * such as J2ME.
 25  
  * </ul>
 26  
  * <p>
 27  
  * There are two key methods: next() and nextToken(). While next() provides access to high level parsing events,
 28  
  * nextToken() allows access to lower level tokens.
 29  
  * <p>
 30  
  * The current event state of the parser can be determined by calling the <a href="#getEventType()">getEventType()</a>
 31  
  * method. Initially, the parser is in the <a href="#START_DOCUMENT">START_DOCUMENT</a> state.
 32  
  * <p>
 33  
  * The method <a href="#next()">next()</a> advances the parser to the next event. The int value returned from next
 34  
  * determines the current parser state and is identical to the value returned from following calls to getEventType ().
 35  
  * <p>
 36  
  * The following event types are seen by next()
 37  
  * <dl>
 38  
  * <dt><a href="#START_TAG">START_TAG</a>
 39  
  * <dd>An XML start tag was read.
 40  
  * <dt><a href="#TEXT">TEXT</a>
 41  
  * <dd>Text content was read; the text content can be retrieved using the getText() method. (when in validating mode
 42  
  * next() will not report ignorable whitespaces, use nextToken() instead)
 43  
  * <dt><a href="#END_TAG">END_TAG</a>
 44  
  * <dd>An end tag was read
 45  
  * <dt><a href="#END_DOCUMENT">END_DOCUMENT</a>
 46  
  * <dd>No more events are available
 47  
  * </dl>
 48  
  * <p>
 49  
  * after first next() or nextToken() (or any other next*() method) is called user application can obtain XML version,
 50  
  * standalone and encoding from XML declaration in following ways:
 51  
  * <ul>
 52  
  * <li><b>version</b>: getProperty(&quot;<a href=
 53  
  * "http://xmlpull.org/v1/doc/properties.html#xmldecl-version">http://xmlpull.org/v1/doc/properties.html#xmldecl-version</a>&quot;)
 54  
  * returns String ("1.0") or null if XMLDecl was not read or if property is not supported
 55  
  * <li><b>standalone</b>: getProperty(&quot;<a href=
 56  
  * "http://xmlpull.org/v1/doc/features.html#xmldecl-standalone">http://xmlpull.org/v1/doc/features.html#xmldecl-standalone</a>&quot;)
 57  
  * returns Boolean: null if there was no standalone declaration or if property is not supported otherwise returns
 58  
  * Boolean(true) if standalone="yes" and Boolean(false) when standalone="no"
 59  
  * <li><b>encoding</b>: obtained from getInputEncoding() null if stream had unknown encoding (not set in setInputStream)
 60  
  * and it was not declared in XMLDecl
 61  
  * </ul>
 62  
  * A minimal example for using this API may look as follows:
 63  
  * 
 64  
  * <pre>
 65  
  * import java.io.IOException;
 66  
  * import java.io.StringReader;
 67  
  *
 68  
  * import org.xmlpull.v1.XmlPullParser;
 69  
  * import org.xmlpull.v1.<a href="XmlPullParserException.html">XmlPullParserException.html</a>;
 70  
  * import org.xmlpull.v1.<a href="XmlPullParserFactory.html">XmlPullParserFactory</a>;
 71  
  *
 72  
  * public class SimpleXmlPullApp
 73  
  * {
 74  
  *
 75  
  *     public static void main (String args[])
 76  
  *         throws XmlPullParserException, IOException
 77  
  *     {
 78  
  *         XmlPullParserFactory factory = XmlPullParserFactory.newInstance();
 79  
  *         factory.setNamespaceAware(true);
 80  
  *         XmlPullParser xpp = factory.newPullParser();
 81  
  *
 82  
  *         xpp.<a href="#setInput">setInput</a>( new StringReader ( "&lt;foo>Hello World!&lt;/foo>" ) );
 83  
  *         int eventType = xpp.getEventType();
 84  
  *         while (eventType != xpp.END_DOCUMENT) {
 85  
  *          if(eventType == xpp.START_DOCUMENT) {
 86  
  *              System.out.println("Start document");
 87  
  *          } else if(eventType == xpp.END_DOCUMENT) {
 88  
  *              System.out.println("End document");
 89  
  *          } else if(eventType == xpp.START_TAG) {
 90  
  *              System.out.println("Start tag "+xpp.<a href="#getName()">getName()</a>);
 91  
  *          } else if(eventType == xpp.END_TAG) {
 92  
  *              System.out.println("End tag "+xpp.getName());
 93  
  *          } else if(eventType == xpp.TEXT) {
 94  
  *              System.out.println("Text "+xpp.<a href="#getText()">getText()</a>);
 95  
  *          }
 96  
  *          eventType = xpp.next();
 97  
  *         }
 98  
  *     }
 99  
  * }
 100  
  * </pre>
 101  
  * <p>
 102  
  * The above example will generate the following output:
 103  
  * 
 104  
  * <pre>
 105  
  * Start document
 106  
  * Start tag foo
 107  
  * Text Hello World!
 108  
  * End tag foo
 109  
  * </pre>
 110  
  * <p>
 111  
  * For more details on API usage, please refer to the quick Introduction available at
 112  
  * <a href="http://www.xmlpull.org">http://www.xmlpull.org</a>
 113  
  *
 114  
  * @see #defineEntityReplacementText
 115  
  * @see #getName
 116  
  * @see #getNamespace
 117  
  * @see #getText
 118  
  * @see #next
 119  
  * @see #nextToken
 120  
  * @see #setInput
 121  
  * @see #FEATURE_PROCESS_DOCDECL
 122  
  * @see #FEATURE_VALIDATION
 123  
  * @see #START_DOCUMENT
 124  
  * @see #START_TAG
 125  
  * @see #TEXT
 126  
  * @see #END_TAG
 127  
  * @see #END_DOCUMENT
 128  
  * @author <a href="http://www-ai.cs.uni-dortmund.de/PERSONAL/haustein.html">Stefan Haustein</a>
 129  
  * @author <a href="http://www.extreme.indiana.edu/~aslom/">Aleksander Slominski</a>
 130  
  */
 131  
 
 132  
 public interface XmlPullParser
 133  
 {
 134  
 
 135  
     /** This constant represents the default namespace (empty string "") */
 136  
     String NO_NAMESPACE = "";
 137  
 
 138  
     // ----------------------------------------------------------------------------
 139  
     // EVENT TYPES as reported by next()
 140  
 
 141  
     /**
 142  
      * Signalize that parser is at the very beginning of the document and nothing was read yet. This event type can only
 143  
      * be observed by calling getEvent() before the first call to next(), nextToken, or nextTag()</a>).
 144  
      *
 145  
      * @see #next
 146  
      * @see #nextToken
 147  
      */
 148  
     int START_DOCUMENT = 0;
 149  
 
 150  
     /**
 151  
      * Logical end of the xml document. Returned from getEventType, next() and nextToken() when the end of the input
 152  
      * document has been reached.
 153  
      * <p>
 154  
      * <strong>NOTE:</strong> calling again <a href="#next()">next()</a> or <a href="#nextToken()">nextToken()</a> will
 155  
      * result in exception being thrown.
 156  
      *
 157  
      * @see #next
 158  
      * @see #nextToken
 159  
      */
 160  
     int END_DOCUMENT = 1;
 161  
 
 162  
     /**
 163  
      * Returned from getEventType(), <a href="#next()">next()</a>, <a href="#nextToken()">nextToken()</a> when a start
 164  
      * tag was read. The name of start tag is available from getName(), its namespace and prefix are available from
 165  
      * getNamespace() and getPrefix() if <a href='#FEATURE_PROCESS_NAMESPACES'>namespaces are enabled</a>. See
 166  
      * getAttribute* methods to retrieve element attributes. See getNamespace* methods to retrieve newly declared
 167  
      * namespaces.
 168  
      *
 169  
      * @see #next
 170  
      * @see #nextToken
 171  
      * @see #getName
 172  
      * @see #getPrefix
 173  
      * @see #getNamespace
 174  
      * @see #getAttributeCount
 175  
      * @see #getDepth
 176  
      * @see #getNamespaceCount
 177  
      * @see #getNamespace
 178  
      * @see #FEATURE_PROCESS_NAMESPACES
 179  
      */
 180  
     int START_TAG = 2;
 181  
 
 182  
     /**
 183  
      * Returned from getEventType(), <a href="#next()">next()</a>, or <a href="#nextToken()">nextToken()</a> when an end
 184  
      * tag was read. The name of start tag is available from getName(), its namespace and prefix are available from
 185  
      * getNamespace() and getPrefix().
 186  
      *
 187  
      * @see #next
 188  
      * @see #nextToken
 189  
      * @see #getName
 190  
      * @see #getPrefix
 191  
      * @see #getNamespace
 192  
      * @see #FEATURE_PROCESS_NAMESPACES
 193  
      */
 194  
     int END_TAG = 3;
 195  
 
 196  
     /**
 197  
      * Character data was read and will is available by calling getText().
 198  
      * <p>
 199  
      * <strong>Please note:</strong> <a href="#next()">next()</a> will accumulate multiple events into one TEXT event,
 200  
      * skipping IGNORABLE_WHITESPACE, PROCESSING_INSTRUCTION and COMMENT events, In contrast,
 201  
      * <a href="#nextToken()">nextToken()</a> will stop reading text when any other event is observed. Also, when the
 202  
      * state was reached by calling next(), the text value will be normalized, whereas getText() will return
 203  
      * unnormalized content in the case of nextToken(). This allows an exact roundtrip without changing line ends when
 204  
      * examining low level events, whereas for high level applications the text is normalized appropriately.
 205  
      *
 206  
      * @see #next
 207  
      * @see #nextToken
 208  
      * @see #getText
 209  
      */
 210  
     int TEXT = 4;
 211  
 
 212  
     // ----------------------------------------------------------------------------
 213  
     // additional events exposed by lower level nextToken()
 214  
 
 215  
     /**
 216  
      * A CDATA sections was just read; this token is available only from calls to
 217  
      * <a href="#nextToken()">nextToken()</a>. A call to next() will accumulate various text events into a single event
 218  
      * of type TEXT. The text contained in the CDATA section is available by calling getText().
 219  
      *
 220  
      * @see #nextToken
 221  
      * @see #getText
 222  
      */
 223  
     int CDSECT = 5;
 224  
 
 225  
     /**
 226  
      * An entity reference was just read; this token is available from <a href="#nextToken()">nextToken()</a> only. The
 227  
      * entity name is available by calling getName(). If available, the replacement text can be obtained by calling
 228  
      * getTextt(); otherwise, the user is responsible for resolving the entity reference. This event type is never
 229  
      * returned from next(); next() will accumulate the replacement text and other text events to a single TEXT event.
 230  
      *
 231  
      * @see #nextToken
 232  
      * @see #getText
 233  
      */
 234  
     int ENTITY_REF = 6;
 235  
 
 236  
     /**
 237  
      * Ignorable whitespace was just read. This token is available only from <a href="#nextToken()">nextToken()</a>).
 238  
      * For non-validating parsers, this event is only reported by nextToken() when outside the root element. Validating
 239  
      * parsers may be able to detect ignorable whitespace at other locations. The ignorable whitespace string is
 240  
      * available by calling getText()
 241  
      * <p>
 242  
      * <strong>NOTE:</strong> this is different from calling the isWhitespace() method, since text content may be
 243  
      * whitespace but not ignorable. Ignorable whitespace is skipped by next() automatically; this event type is never
 244  
      * returned from next().
 245  
      *
 246  
      * @see #nextToken
 247  
      * @see #getText
 248  
      */
 249  
     int IGNORABLE_WHITESPACE = 7;
 250  
 
 251  
     /**
 252  
      * An XML processing instruction declaration was just read. This event type is available only via
 253  
      * <a href="#nextToken()">nextToken()</a>. getText() will return text that is inside the processing instruction.
 254  
      * Calls to next() will skip processing instructions automatically.
 255  
      * 
 256  
      * @see #nextToken
 257  
      * @see #getText
 258  
      */
 259  
     int PROCESSING_INSTRUCTION = 8;
 260  
 
 261  
     /**
 262  
      * An XML comment was just read. This event type is this token is available via
 263  
      * <a href="#nextToken()">nextToken()</a> only; calls to next() will skip comments automatically. The content of the
 264  
      * comment can be accessed using the getText() method.
 265  
      *
 266  
      * @see #nextToken
 267  
      * @see #getText
 268  
      */
 269  
     int COMMENT = 9;
 270  
 
 271  
     /**
 272  
      * An XML document type declaration was just read. This token is available from
 273  
      * <a href="#nextToken()">nextToken()</a> only. The unparsed text inside the doctype is available via the getText()
 274  
      * method.
 275  
      *
 276  
      * @see #nextToken
 277  
      * @see #getText
 278  
      */
 279  
     int DOCDECL = 10;
 280  
 
 281  
     /**
 282  
      * This array can be used to convert the event type integer constants such as START_TAG or TEXT to to a string. For
 283  
      * example, the value of TYPES[START_TAG] is the string "START_TAG". This array is intended for diagnostic output
 284  
      * only. Relying on the contents of the array may be dangerous since malicious applications may alter the array,
 285  
      * although it is final, due to limitations of the Java language.
 286  
      */
 287  
     String[] TYPES = { "START_DOCUMENT", "END_DOCUMENT", "START_TAG", "END_TAG", "TEXT", "CDSECT", "ENTITY_REF",
 288  
         "IGNORABLE_WHITESPACE", "PROCESSING_INSTRUCTION", "COMMENT", "DOCDECL" };
 289  
 
 290  
     // ----------------------------------------------------------------------------
 291  
     // namespace related features
 292  
 
 293  
     /**
 294  
      * This feature determines whether the parser processes namespaces. As for all features, the default value is false.
 295  
      * <p>
 296  
      * <strong>NOTE:</strong> The value can not be changed during parsing an must be set before parsing.
 297  
      *
 298  
      * @see #getFeature
 299  
      * @see #setFeature
 300  
      */
 301  
     String FEATURE_PROCESS_NAMESPACES = "http://xmlpull.org/v1/doc/features.html#process-namespaces";
 302  
 
 303  
     /**
 304  
      * This feature determines whether namespace attributes are exposed via the attribute access methods. Like all
 305  
      * features, the default value is false. This feature cannot be changed during parsing.
 306  
      *
 307  
      * @see #getFeature
 308  
      * @see #setFeature
 309  
      */
 310  
     String FEATURE_REPORT_NAMESPACE_ATTRIBUTES = "http://xmlpull.org/v1/doc/features.html#report-namespace-prefixes";
 311  
 
 312  
     /**
 313  
      * This feature determines whether the document declaration is processed. If set to false, the DOCDECL event type is
 314  
      * reported by nextToken() and ignored by next(). If this feature is activated, then the document declaration must
 315  
      * be processed by the parser.
 316  
      * <p>
 317  
      * <strong>Please note:</strong> If the document type declaration was ignored, entity references may cause
 318  
      * exceptions later in the parsing process. The default value of this feature is false. It cannot be changed during
 319  
      * parsing.
 320  
      *
 321  
      * @see #getFeature
 322  
      * @see #setFeature
 323  
      */
 324  
     String FEATURE_PROCESS_DOCDECL = "http://xmlpull.org/v1/doc/features.html#process-docdecl";
 325  
 
 326  
     /**
 327  
      * If this feature is activated, all validation errors as defined in the XML 1.0 specification are reported. This
 328  
      * implies that FEATURE_PROCESS_DOCDECL is true and both, the internal and external document type declaration will
 329  
      * be processed.
 330  
      * <p>
 331  
      * <strong>Please Note:</strong> This feature can not be changed during parsing. The default value is false.
 332  
      *
 333  
      * @see #getFeature
 334  
      * @see #setFeature
 335  
      */
 336  
     String FEATURE_VALIDATION = "http://xmlpull.org/v1/doc/features.html#validation";
 337  
 
 338  
     /**
 339  
      * Use this call to change the general behaviour of the parser, such as namespace processing or doctype declaration
 340  
      * handling. This method must be called before the first call to next or nextToken. Otherwise, an exception is
 341  
      * thrown.
 342  
      * <p>
 343  
      * Example: call setFeature(FEATURE_PROCESS_NAMESPACES, true) in order to switch on namespace processing. The
 344  
      * initial settings correspond to the properties requested from the XML Pull Parser factory. If none were requested,
 345  
      * all features are deactivated by default.
 346  
      *
 347  
      * @exception XmlPullParserException If the feature is not supported or can not be set
 348  
      * @exception IllegalArgumentException If string with the feature name is null
 349  
      */
 350  
     void setFeature( String name, boolean state )
 351  
         throws XmlPullParserException;
 352  
 
 353  
     /**
 354  
      * Returns the current value of the given feature.
 355  
      * <p>
 356  
      * <strong>Please note:</strong> unknown features are <strong>always</strong> returned as false.
 357  
      *
 358  
      * @param name The name of feature to be retrieved.
 359  
      * @return The value of the feature.
 360  
      * @exception IllegalArgumentException if string the feature name is null
 361  
      */
 362  
 
 363  
     boolean getFeature( String name );
 364  
 
 365  
     /**
 366  
      * Set the value of a property. The property name is any fully-qualified URI.
 367  
      *
 368  
      * @exception XmlPullParserException If the property is not supported or can not be set
 369  
      * @exception IllegalArgumentException If string with the property name is null
 370  
      */
 371  
     void setProperty( String name, Object value )
 372  
         throws XmlPullParserException;
 373  
 
 374  
     /**
 375  
      * Look up the value of a property. The property name is any fully-qualified URI.
 376  
      * <p>
 377  
      * <strong>NOTE:</strong> unknown properties are <strong>always</strong> returned as null.
 378  
      *
 379  
      * @param name The name of property to be retrieved.
 380  
      * @return The value of named property.
 381  
      */
 382  
     Object getProperty( String name );
 383  
 
 384  
     /**
 385  
      * Set the input source for parser to the given reader and resets the parser. The event type is set to the initial
 386  
      * value START_DOCUMENT. Setting the reader to null will just stop parsing and reset parser state, allowing the
 387  
      * parser to free internal resources such as parsing buffers.
 388  
      */
 389  
     void setInput( Reader in )
 390  
         throws XmlPullParserException;
 391  
 
 392  
     /**
 393  
      * Sets the input stream the parser is going to process. This call resets the parser state and sets the event type
 394  
      * to the initial value START_DOCUMENT.
 395  
      * <p>
 396  
      * <strong>NOTE:</strong> If an input encoding string is passed, it MUST be used. Otherwise, if inputEncoding is
 397  
      * null, the parser SHOULD try to determine input encoding following XML 1.0 specification (see below). If encoding
 398  
      * detection is supported then following feature <a href=
 399  
      * "http://xmlpull.org/v1/doc/features.html#detect-encoding">http://xmlpull.org/v1/doc/features.html#detect-encoding</a>
 400  
      * MUST be true and otherwise it must be false
 401  
      *
 402  
      * @param inputStream contains a raw byte input stream of possibly unknown encoding (when inputEncoding is null).
 403  
      * @param inputEncoding if not null it MUST be used as encoding for inputStream
 404  
      */
 405  
     void setInput( InputStream inputStream, String inputEncoding )
 406  
         throws XmlPullParserException;
 407  
 
 408  
     /**
 409  
      * Returns the input encoding if known, null otherwise. If setInput(InputStream, inputEncoding) was called with an
 410  
      * inputEncoding value other than null, this value must be returned from this method. Otherwise, if inputEncoding is
 411  
      * null and the parser supports the encoding detection feature
 412  
      * (http://xmlpull.org/v1/doc/features.html#detect-encoding), it must return the detected encoding. If
 413  
      * setInput(Reader) was called, null is returned. After first call to next if XML declaration was present this
 414  
      * method will return encoding declared.
 415  
      */
 416  
     String getInputEncoding();
 417  
 
 418  
     /**
 419  
      * Set new value for entity replacement text as defined in
 420  
      * <a href="http://www.w3.org/TR/REC-xml#intern-replacement">XML 1.0 Section 4.5 Construction of Internal Entity
 421  
      * Replacement Text</a>. If FEATURE_PROCESS_DOCDECL or FEATURE_VALIDATION are set, calling this function will result
 422  
      * in an exception -- when processing of DOCDECL is enabled, there is no need to the entity replacement text
 423  
      * manually.
 424  
      * <p>
 425  
      * The motivation for this function is to allow very small implementations of XMLPULL that will work in J2ME
 426  
      * environments. Though these implementations may not be able to process the document type declaration, they still
 427  
      * can work with known DTDs by using this function.
 428  
      * <p>
 429  
      * <b>Please notes:</b> The given value is used literally as replacement text and it corresponds to declaring entity
 430  
      * in DTD that has all special characters escaped: left angle bracket is replaced with &amp;lt;, ampersand with
 431  
      * &amp;amp; and so on.
 432  
      * <p>
 433  
      * <b>Note:</b> The given value is the literal replacement text and must not contain any other entity reference (if
 434  
      * it contains any entity reference there will be no further replacement).
 435  
      * <p>
 436  
      * <b>Note:</b> The list of pre-defined entity names will always contain standard XML entities such as amp
 437  
      * (&amp;amp;), lt (&amp;lt;), gt (&amp;gt;), quot (&amp;quot;), and apos (&amp;apos;). Those cannot be redefined by
 438  
      * this method!
 439  
      *
 440  
      * @see #setInput
 441  
      * @see #FEATURE_PROCESS_DOCDECL
 442  
      * @see #FEATURE_VALIDATION
 443  
      */
 444  
     void defineEntityReplacementText( String entityName, String replacementText )
 445  
         throws XmlPullParserException;
 446  
 
 447  
     /**
 448  
      * Returns the numbers of elements in the namespace stack for the given depth. If namespaces are not enabled, 0 is
 449  
      * returned.
 450  
      * <p>
 451  
      * <b>NOTE:</b> when parser is on END_TAG then it is allowed to call this function with getDepth()+1 argument to
 452  
      * retrieve position of namespace prefixes and URIs that were declared on corresponding START_TAG.
 453  
      * <p>
 454  
      * <b>NOTE:</b> to retrieve lsit of namespaces declared in current element:
 455  
      * 
 456  
      * <pre>
 457  
      *       XmlPullParser pp = ...
 458  
      *       int nsStart = pp.getNamespaceCount(pp.getDepth()-1);
 459  
      *       int nsEnd = pp.getNamespaceCount(pp.getDepth());
 460  
      *       for (int i = nsStart; i < nsEnd; i++) {
 461  
      *          String prefix = pp.getNamespacePrefix(i);
 462  
      *          String ns = pp.getNamespaceUri(i);
 463  
      *           // ...
 464  
      *      }
 465  
      * </pre>
 466  
      *
 467  
      * @see #getNamespacePrefix
 468  
      * @see #getNamespaceUri
 469  
      * @see #getNamespace()
 470  
      * @see #getNamespace(String)
 471  
      */
 472  
     int getNamespaceCount( int depth )
 473  
         throws XmlPullParserException;
 474  
 
 475  
     /**
 476  
      * Returns the namespace prefix for the given position in the namespace stack. Default namespace declaration
 477  
      * (xmlns='...') will have null as prefix. If the given index is out of range, an exception is thrown.
 478  
      * <p>
 479  
      * <b>Please note:</b> when the parser is on an END_TAG, namespace prefixes that were declared in the corresponding
 480  
      * START_TAG are still accessible although they are no longer in scope.
 481  
      */
 482  
     String getNamespacePrefix( int pos )
 483  
         throws XmlPullParserException;
 484  
 
 485  
     /**
 486  
      * Returns the namespace URI for the given position in the namespace stack If the position is out of range, an
 487  
      * exception is thrown.
 488  
      * <p>
 489  
      * <b>NOTE:</b> when parser is on END_TAG then namespace prefixes that were declared in corresponding START_TAG are
 490  
      * still accessible even though they are not in scope
 491  
      */
 492  
     String getNamespaceUri( int pos )
 493  
         throws XmlPullParserException;
 494  
 
 495  
     /**
 496  
      * Returns the URI corresponding to the given prefix, depending on current state of the parser.
 497  
      * <p>
 498  
      * If the prefix was not declared in the current scope, null is returned. The default namespace is included in the
 499  
      * namespace table and is available via getNamespace (null).
 500  
      * <p>
 501  
      * This method is a convenience method for
 502  
      *
 503  
      * <pre>
 504  
      * for ( int i = getNamespaceCount( getDepth() ) - 1; i >= 0; i-- )
 505  
      * {
 506  
      *     if ( getNamespacePrefix( i ).equals( prefix ) )
 507  
      *     {
 508  
      *         return getNamespaceUri( i );
 509  
      *     }
 510  
      * }
 511  
      * return null;
 512  
      * </pre>
 513  
      * <p>
 514  
      * <strong>Please note:</strong> parser implementations may provide more efficient lookup, e.g. using a Hashtable.
 515  
      * The 'xml' prefix is bound to "http://www.w3.org/XML/1998/namespace", as defined in the
 516  
      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a> specification. Analogous, the
 517  
      * 'xmlns' prefix is resolved to <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a>
 518  
      *
 519  
      * @see #getNamespaceCount
 520  
      * @see #getNamespacePrefix
 521  
      * @see #getNamespaceUri
 522  
      */
 523  
     String getNamespace( String prefix );
 524  
 
 525  
     // --------------------------------------------------------------------------
 526  
     // miscellaneous reporting methods
 527  
 
 528  
     /**
 529  
      * Returns the current depth of the element. Outside the root element, the depth is 0. The depth is incremented by 1
 530  
      * when a start tag is reached. The depth is decremented AFTER the end tag event was observed.
 531  
      *
 532  
      * <pre>
 533  
      * &lt;!-- outside --&gt;     0
 534  
      * &lt;root>                  1
 535  
      *   sometext                 1
 536  
      *     &lt;foobar&gt;         2
 537  
      *     &lt;/foobar&gt;        2
 538  
      * &lt;/root&gt;              1
 539  
      * &lt;!-- outside --&gt;     0
 540  
      * </pre>
 541  
      */
 542  
     int getDepth();
 543  
 
 544  
     /**
 545  
      * Returns a short text describing the current parser state, including the position, a description of the current
 546  
      * event and the data source if known. This method is especially useful to provide meaningful error messages and for
 547  
      * debugging purposes.
 548  
      */
 549  
     String getPositionDescription();
 550  
 
 551  
     /**
 552  
      * Returns the current line number, starting from 1. When the parser does not know the current line number or can
 553  
      * not determine it, -1 is returned (e.g. for WBXML).
 554  
      *
 555  
      * @return current line number or -1 if unknown.
 556  
      */
 557  
     int getLineNumber();
 558  
 
 559  
     /**
 560  
      * Returns the current column number, starting from 0. When the parser does not know the current column number or
 561  
      * can not determine it, -1 is returned (e.g. for WBXML).
 562  
      *
 563  
      * @return current column number or -1 if unknown.
 564  
      */
 565  
     int getColumnNumber();
 566  
 
 567  
     // --------------------------------------------------------------------------
 568  
     // TEXT related methods
 569  
 
 570  
     /**
 571  
      * Checks whether the current TEXT event contains only whitespace characters. For IGNORABLE_WHITESPACE, this is
 572  
      * always true. For TEXT and CDSECT, false is returned when the current event text contains at least one non-white
 573  
      * space character. For any other event type an exception is thrown.
 574  
      * <p>
 575  
      * <b>Please note:</b> non-validating parsers are not able to distinguish whitespace and ignorable whitespace,
 576  
      * except from whitespace outside the root element. Ignorable whitespace is reported as separate event, which is
 577  
      * exposed via nextToken only.
 578  
      */
 579  
     boolean isWhitespace()
 580  
         throws XmlPullParserException;
 581  
 
 582  
     /**
 583  
      * Returns the text content of the current event as String. The value returned depends on current event type, for
 584  
      * example for TEXT event it is element content (this is typical case when next() is used). See description of
 585  
      * nextToken() for detailed description of possible returned values for different types of events.
 586  
      * <p>
 587  
      * <strong>NOTE:</strong> in case of ENTITY_REF, this method returns the entity replacement text (or null if not
 588  
      * available). This is the only case where getText() and getTextCharacters() return different values.
 589  
      *
 590  
      * @see #getEventType
 591  
      * @see #next
 592  
      * @see #nextToken
 593  
      */
 594  
     String getText();
 595  
 
 596  
     /**
 597  
      * Returns the buffer that contains the text of the current event, as well as the start offset and length relevant
 598  
      * for the current event. See getText(), next() and nextToken() for description of possible returned values.
 599  
      * <p>
 600  
      * <strong>Please note:</strong> this buffer must not be modified and its content MAY change after a call to next()
 601  
      * or nextToken(). This method will always return the same value as getText(), except for ENTITY_REF. In the case of
 602  
      * ENTITY ref, getText() returns the replacement text and this method returns the actual input buffer containing the
 603  
      * entity name. If getText() returns null, this method returns null as well and the values returned in the holder
 604  
      * array MUST be -1 (both start and length).
 605  
      *
 606  
      * @see #getText
 607  
      * @see #next
 608  
      * @see #nextToken
 609  
      * @param holderForStartAndLength Must hold an 2-element int array into which the start offset and length values
 610  
      *            will be written.
 611  
      * @return char buffer that contains the text of the current event (null if the current event has no text
 612  
      *         associated).
 613  
      */
 614  
     char[] getTextCharacters( int[] holderForStartAndLength );
 615  
 
 616  
     // --------------------------------------------------------------------------
 617  
     // START_TAG / END_TAG shared methods
 618  
 
 619  
     /**
 620  
      * Returns the namespace URI of the current element. The default namespace is represented as empty string. If
 621  
      * namespaces are not enabled, an empty String ("") is always returned. The current event must be START_TAG or
 622  
      * END_TAG; otherwise, null is returned.
 623  
      */
 624  
     String getNamespace();
 625  
 
 626  
     /**
 627  
      * For START_TAG or END_TAG events, the (local) name of the current element is returned when namespaces are enabled.
 628  
      * When namespace processing is disabled, the raw name is returned. For ENTITY_REF events, the entity name is
 629  
      * returned. If the current event is not START_TAG, END_TAG, or ENTITY_REF, null is returned.
 630  
      * <p>
 631  
      * <b>Please note:</b> To reconstruct the raw element name when namespaces are enabled and the prefix is not null,
 632  
      * you will need to add the prefix and a colon to localName..
 633  
      */
 634  
     String getName();
 635  
 
 636  
     /**
 637  
      * Returns the prefix of the current element. If the element is in the default namespace (has no prefix), null is
 638  
      * returned. If namespaces are not enabled, or the current event is not START_TAG or END_TAG, null is returned.
 639  
      */
 640  
     String getPrefix();
 641  
 
 642  
     /**
 643  
      * Returns true if the current event is START_TAG and the tag is degenerated (e.g. &lt;foobar/&gt;).
 644  
      * <p>
 645  
      * <b>NOTE:</b> if the parser is not on START_TAG, an exception will be thrown.
 646  
      */
 647  
     boolean isEmptyElementTag()
 648  
         throws XmlPullParserException;
 649  
 
 650  
     // --------------------------------------------------------------------------
 651  
     // START_TAG Attributes retrieval methods
 652  
 
 653  
     /**
 654  
      * Returns the number of attributes of the current start tag, or -1 if the current event type is not START_TAG
 655  
      *
 656  
      * @see #getAttributeNamespace
 657  
      * @see #getAttributeName
 658  
      * @see #getAttributePrefix
 659  
      * @see #getAttributeValue
 660  
      */
 661  
     int getAttributeCount();
 662  
 
 663  
     /**
 664  
      * Returns the namespace URI of the attribute with the given index (starts from 0). Returns an empty string ("") if
 665  
      * namespaces are not enabled or the attribute has no namespace. Throws an IndexOutOfBoundsException if the index is
 666  
      * out of range or the current event type is not START_TAG.
 667  
      * <p>
 668  
      * <strong>NOTE:</strong> if FEATURE_REPORT_NAMESPACE_ATTRIBUTES is set then namespace attributes (xmlns:ns='...')
 669  
      * must be reported with namespace <a href="http://www.w3.org/2000/xmlns/">http://www.w3.org/2000/xmlns/</a> (visit
 670  
      * this URL for description!). The default namespace attribute (xmlns="...") will be reported with empty namespace.
 671  
      * <p>
 672  
      * <strong>NOTE:</strong>The xml prefix is bound as defined in
 673  
      * <a href="http://www.w3.org/TR/REC-xml-names/#ns-using">Namespaces in XML</a> specification to
 674  
      * "http://www.w3.org/XML/1998/namespace".
 675  
      *
 676  
      * @param index zero based index of attribute
 677  
      * @return attribute namespace, empty string ("") is returned if namespaces processing is not enabled or namespaces
 678  
      *         processing is enabled but attribute has no namespace (it has no prefix).
 679  
      */
 680  
     String getAttributeNamespace( int index );
 681  
 
 682  
     /**
 683  
      * Returns the local name of the specified attribute if namespaces are enabled or just attribute name if namespaces
 684  
      * are disabled. Throws an IndexOutOfBoundsException if the index is out of range or current event type is not
 685  
      * START_TAG.
 686  
      *
 687  
      * @param index zero based index of attribute
 688  
      * @return attribute name (null is never returned)
 689  
      */
 690  
     String getAttributeName( int index );
 691  
 
 692  
     /**
 693  
      * Returns the prefix of the specified attribute Returns null if the element has no prefix. If namespaces are
 694  
      * disabled it will always return null. Throws an IndexOutOfBoundsException if the index is out of range or current
 695  
      * event type is not START_TAG.
 696  
      *
 697  
      * @param index zero based index of attribute
 698  
      * @return attribute prefix or null if namespaces processing is not enabled.
 699  
      */
 700  
     String getAttributePrefix( int index );
 701  
 
 702  
     /**
 703  
      * Returns the type of the specified attribute If parser is non-validating it MUST return CDATA.
 704  
      *
 705  
      * @param index zero based index of attribute
 706  
      * @return attribute type (null is never returned)
 707  
      */
 708  
     String getAttributeType( int index );
 709  
 
 710  
     /**
 711  
      * Returns if the specified attribute was not in input was declared in XML. If parser is non-validating it MUST
 712  
      * always return false. This information is part of XML infoset:
 713  
      *
 714  
      * @param index zero based index of attribute
 715  
      * @return false if attribute was in input
 716  
      */
 717  
     boolean isAttributeDefault( int index );
 718  
 
 719  
     /**
 720  
      * Returns the given attributes value. Throws an IndexOutOfBoundsException if the index is out of range or current
 721  
      * event type is not START_TAG.
 722  
      * <p>
 723  
      * <strong>NOTE:</strong> attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL
 724  
      * is false) as described in <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section 3.3.3
 725  
      * Attribute-Value Normalization</a>
 726  
      *
 727  
      * @see #defineEntityReplacementText
 728  
      * @param index zero based index of attribute
 729  
      * @return value of attribute (null is never returned)
 730  
      */
 731  
     String getAttributeValue( int index );
 732  
 
 733  
     /**
 734  
      * Returns the attributes value identified by namespace URI and namespace localName. If namespaces are disabled
 735  
      * namespace must be null. If current event type is not START_TAG then IndexOutOfBoundsException will be thrown.
 736  
      * <p>
 737  
      * <strong>NOTE:</strong> attribute value must be normalized (including entity replacement text if PROCESS_DOCDECL
 738  
      * is false) as described in <a href="http://www.w3.org/TR/REC-xml#AVNormalize">XML 1.0 section 3.3.3
 739  
      * Attribute-Value Normalization</a>
 740  
      *
 741  
      * @see #defineEntityReplacementText
 742  
      * @param namespace Namespace of the attribute if namespaces are enabled otherwise must be null
 743  
      * @param name If namespaces enabled local name of attribute otherwise just attribute name
 744  
      * @return value of attribute or null if attribute with given name does not exist
 745  
      */
 746  
     String getAttributeValue( String namespace, String name );
 747  
 
 748  
     // --------------------------------------------------------------------------
 749  
     // actual parsing methods
 750  
 
 751  
     /**
 752  
      * Returns the type of the current event (START_TAG, END_TAG, TEXT, etc.)
 753  
      *
 754  
      * @see #next()
 755  
      * @see #nextToken()
 756  
      */
 757  
     int getEventType()
 758  
         throws XmlPullParserException;
 759  
 
 760  
     /**
 761  
      * Get next parsing event - element content wil be coalesced and only one TEXT event must be returned for whole
 762  
      * element content (comments and processing instructions will be ignored and entity references must be expanded or
 763  
      * exception mus be thrown if entity reference can not be expanded). If element content is empty (content is "")
 764  
      * then no TEXT event will be reported.
 765  
      * <p>
 766  
      * <b>NOTE:</b> empty element (such as &lt;tag/>) will be reported with two separate events: START_TAG, END_TAG - it
 767  
      * must be so to preserve parsing equivalency of empty element to &lt;tag>&lt;/tag>. (see isEmptyElementTag ())
 768  
      *
 769  
      * @see #isEmptyElementTag
 770  
      * @see #START_TAG
 771  
      * @see #TEXT
 772  
      * @see #END_TAG
 773  
      * @see #END_DOCUMENT
 774  
      */
 775  
 
 776  
     int next()
 777  
         throws XmlPullParserException, IOException;
 778  
 
 779  
     /**
 780  
      * This method works similarly to next() but will expose additional event types (COMMENT, CDSECT, DOCDECL,
 781  
      * ENTITY_REF, PROCESSING_INSTRUCTION, or IGNORABLE_WHITESPACE) if they are available in input.
 782  
      * <p>
 783  
      * If special feature <a href="http://xmlpull.org/v1/doc/features.html#xml-roundtrip">FEATURE_XML_ROUNDTRIP</a>
 784  
      * (identified by URI: http://xmlpull.org/v1/doc/features.html#xml-roundtrip) is enabled it is possible to do XML
 785  
      * document round trip ie. reproduce exactly on output the XML input using getText(): returned content is always
 786  
      * unnormalized (exactly as in input). Otherwise returned content is end-of-line normalized as described
 787  
      * <a href="http://www.w3.org/TR/REC-xml#sec-line-ends">XML 1.0 End-of-Line Handling</a> and. Also when this feature
 788  
      * is enabled exact content of START_TAG, END_TAG, DOCDECL and PROCESSING_INSTRUCTION is available.
 789  
      * <p>
 790  
      * Here is the list of tokens that can be returned from nextToken() and what getText() and getTextCharacters()
 791  
      * returns:
 792  
      * <dl>
 793  
      * <dt>START_DOCUMENT
 794  
      * <dd>null
 795  
      * <dt>END_DOCUMENT
 796  
      * <dd>null
 797  
      * <dt>START_TAG
 798  
      * <dd>null unless FEATURE_XML_ROUNDTRIP enabled and then returns XML tag, ex: &lt;tag attr='val'>
 799  
      * <dt>END_TAG
 800  
      * <dd>null unless FEATURE_XML_ROUNDTRIP id enabled and then returns XML tag, ex: &lt;/tag>
 801  
      * <dt>TEXT
 802  
      * <dd>return element content. <br>
 803  
      * Note: that element content may be delivered in multiple consecutive TEXT events.
 804  
      * <dt>IGNORABLE_WHITESPACE
 805  
      * <dd>return characters that are determined to be ignorable white space. If the FEATURE_XML_ROUNDTRIP is enabled
 806  
      * all whitespace content outside root element will always reported as IGNORABLE_WHITESPACE otherwise reporting is
 807  
      * optional. <br>
 808  
      * Note: that element content may be delivered in multiple consecutive IGNORABLE_WHITESPACE events.
 809  
      * <dt>CDSECT
 810  
      * <dd>return text <em>inside</em> CDATA (ex. 'fo&lt;o' from &lt;!CDATA[fo&lt;o]]>)
 811  
      * <dt>PROCESSING_INSTRUCTION
 812  
      * <dd>if FEATURE_XML_ROUNDTRIP is true return exact PI content ex: 'pi foo' from &lt;?pi foo?> otherwise it may be
 813  
      * exact PI content or concatenation of PI target, space and data so for example for &lt;?target data?> string
 814  
      * &quot;target data&quot; may be returned if FEATURE_XML_ROUNDTRIP is false.
 815  
      * <dt>COMMENT
 816  
      * <dd>return comment content ex. 'foo bar' from &lt;!--foo bar-->
 817  
      * <dt>ENTITY_REF
 818  
      * <dd>getText() MUST return entity replacement text if PROCESS_DOCDECL is false otherwise getText() MAY return
 819  
      * null, additionally getTextCharacters() MUST return entity name (for example 'entity_name' for &amp;entity_name;).
 820  
      * <br>
 821  
      * <b>NOTE:</b> this is the only place where value returned from getText() and getTextCharacters() <b>are
 822  
      * different</b> <br>
 823  
      * <b>NOTE:</b> it is user responsibility to resolve entity reference if PROCESS_DOCDECL is false and there is no
 824  
      * entity replacement text set in defineEntityReplacementText() method (getText() will be null) <br>
 825  
      * <b>NOTE:</b> character entities (ex. &amp;#32;) and standard entities such as &amp;amp; &amp;lt; &amp;gt;
 826  
      * &amp;quot; &amp;apos; are reported as well and are <b>not</b> reported as TEXT tokens but as ENTITY_REF tokens!
 827  
      * This requirement is added to allow to do roundtrip of XML documents!
 828  
      * <dt>DOCDECL
 829  
      * <dd>if FEATURE_XML_ROUNDTRIP is true or PROCESS_DOCDECL is false then return what is inside of DOCDECL for
 830  
      * example it returns:
 831  
      * 
 832  
      * <pre>
 833  
      * &quot; titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
 834  
      * [&lt;!ENTITY % active.links "INCLUDE">]&quot;
 835  
      * </pre>
 836  
      * <p>
 837  
      * for input document that contained:
 838  
      * 
 839  
      * <pre>
 840  
      * &lt;!DOCTYPE titlepage SYSTEM "http://www.foo.bar/dtds/typo.dtd"
 841  
      * [&lt;!ENTITY % active.links "INCLUDE">]>
 842  
      * </pre>
 843  
      * 
 844  
      * otherwise if FEATURE_XML_ROUNDTRIP is false and PROCESS_DOCDECL is true then what is returned is undefined (it
 845  
      * may be even null)</dd>
 846  
      * </dl>
 847  
      * <p>
 848  
      * <strong>NOTE:</strong> there is no guarantee that there will only one TEXT or IGNORABLE_WHITESPACE event from
 849  
      * nextToken() as parser may chose to deliver element content in multiple tokens (dividing element content into
 850  
      * chunks)
 851  
      * <p>
 852  
      * <strong>NOTE:</strong> whether returned text of token is end-of-line normalized is depending on
 853  
      * FEATURE_XML_ROUNDTRIP.
 854  
      * <p>
 855  
      * <strong>NOTE:</strong> XMLDecl (&lt;?xml ...?&gt;) is not reported but its content is available through optional
 856  
      * properties (see class description above).
 857  
      *
 858  
      * @see #next
 859  
      * @see #START_TAG
 860  
      * @see #TEXT
 861  
      * @see #END_TAG
 862  
      * @see #END_DOCUMENT
 863  
      * @see #COMMENT
 864  
      * @see #DOCDECL
 865  
      * @see #PROCESSING_INSTRUCTION
 866  
      * @see #ENTITY_REF
 867  
      * @see #IGNORABLE_WHITESPACE
 868  
      */
 869  
     int nextToken()
 870  
         throws XmlPullParserException, IOException;
 871  
 
 872  
     // -----------------------------------------------------------------------------
 873  
     // utility methods to mak XML parsing easier ...
 874  
 
 875  
     /**
 876  
      * Test if the current event is of the given type and if the namespace and name do match. null will match any
 877  
      * namespace and any name. If the test is not passed, an exception is thrown. The exception text indicates the
 878  
      * parser position, the expected event and the current event that is not meeting the requirement.
 879  
      * <p>
 880  
      * Essentially it does this
 881  
      * 
 882  
      * <pre>
 883  
      * if ( type != getEventType() || ( namespace != null &amp;&amp; !namespace.equals( getNamespace() ) )
 884  
      *     || ( name != null &amp;&amp; !name.equals( getName() ) ) )
 885  
      *     throw new XmlPullParserException( "expected " + TYPES[type] + getPositionDescription() );
 886  
      * </pre>
 887  
      */
 888  
     void require( int type, String namespace, String name )
 889  
         throws XmlPullParserException, IOException;
 890  
 
 891  
     /**
 892  
      * If current event is START_TAG then if next element is TEXT then element content is returned or if next event is
 893  
      * END_TAG then empty string is returned, otherwise exception is thrown. After calling this function successfully
 894  
      * parser will be positioned on END_TAG.
 895  
      * <p>
 896  
      * The motivation for this function is to allow to parse consistently both empty elements and elements that has non
 897  
      * empty content, for example for input:
 898  
      * <ol>
 899  
      * <li>&lt;tag&gt;foo&lt;/tag&gt;
 900  
      * <li>&lt;tag&gt;&lt;/tag&gt; (which is equivalent to &lt;tag/&gt; both input can be parsed with the same code:
 901  
      * 
 902  
      * <pre>
 903  
      *   p.nextTag()
 904  
      *   p.requireEvent(p.START_TAG, "", "tag");
 905  
      *   String content = p.nextText();
 906  
      *   p.requireEvent(p.END_TAG, "", "tag");
 907  
      * </pre>
 908  
      * 
 909  
      * This function together with nextTag make it very easy to parse XML that has no mixed content.
 910  
      * <p>
 911  
      * Essentially it does this
 912  
      * 
 913  
      * <pre>
 914  
      * if ( getEventType() != START_TAG )
 915  
      * {
 916  
      *     throw new XmlPullParserException( "parser must be on START_TAG to read next text", this, null );
 917  
      * }
 918  
      * int eventType = next();
 919  
      * if ( eventType == TEXT )
 920  
      * {
 921  
      *     String result = getText();
 922  
      *     eventType = next();
 923  
      *     if ( eventType != END_TAG )
 924  
      *     {
 925  
      *         throw new XmlPullParserException( "event TEXT it must be immediately followed by END_TAG", this, null );
 926  
      *     }
 927  
      *     return result;
 928  
      * }
 929  
      * else if ( eventType == END_TAG )
 930  
      * {
 931  
      *     return "";
 932  
      * }
 933  
      * else
 934  
      * {
 935  
      *     throw new XmlPullParserException( "parser must be on START_TAG or TEXT to read text", this, null );
 936  
      * }
 937  
      * </pre>
 938  
      */
 939  
     String nextText()
 940  
         throws XmlPullParserException, IOException;
 941  
 
 942  
     /**
 943  
      * Call next() and return event if it is START_TAG or END_TAG otherwise throw an exception. It will skip whitespace
 944  
      * TEXT before actual tag if any.
 945  
      * <p>
 946  
      * essentially it does this
 947  
      * 
 948  
      * <pre>
 949  
      * int eventType = next();
 950  
      * if ( eventType == TEXT &amp;&amp; isWhitespace() )
 951  
      * { // skip whitespace
 952  
      *     eventType = next();
 953  
      * }
 954  
      * if ( eventType != START_TAG &amp;&amp; eventType != END_TAG )
 955  
      * {
 956  
      *     throw new XmlPullParserException( "expected start or end tag", this, null );
 957  
      * }
 958  
      * return eventType;
 959  
      * </pre>
 960  
      */
 961  
     int nextTag()
 962  
         throws XmlPullParserException, IOException;
 963  
 
 964  
 }