001/* 002 * ============================================================================ 003 * Copyright © 2002-2021 by Thomas Thrien. 004 * All Rights Reserved. 005 * ============================================================================ 006 * Licensed to the public under the agreements of the GNU Lesser General Public 007 * License, version 3.0 (the "License"). You may obtain a copy of the License at 008 * 009 * http://www.gnu.org/licenses/lgpl.html 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017 018package org.tquadrat.foundation.xml.parse; 019 020import static org.apiguardian.api.API.Status.STABLE; 021import static org.tquadrat.foundation.lang.CommonConstants.UTF8; 022import static org.tquadrat.foundation.lang.Objects.nonNull; 023import static org.tquadrat.foundation.lang.Objects.requireNonNullArgument; 024 025import java.io.BufferedReader; 026import java.io.IOException; 027import java.io.InputStreamReader; 028import java.net.URL; 029import java.util.HashMap; 030import java.util.Map; 031 032import org.apiguardian.api.API; 033import org.tquadrat.foundation.annotation.ClassVersion; 034import org.xml.sax.Attributes; 035import org.xml.sax.ContentHandler; 036import org.xml.sax.DTDHandler; 037import org.xml.sax.EntityResolver; 038import org.xml.sax.ErrorHandler; 039import org.xml.sax.InputSource; 040import org.xml.sax.Locator; 041import org.xml.sax.SAXException; 042import org.xml.sax.SAXNotRecognizedException; 043import org.xml.sax.SAXNotSupportedException; 044import org.xml.sax.XMLReader; 045import org.xml.sax.helpers.AttributesImpl; 046import org.xml.sax.helpers.LocatorImpl; 047 048/** 049 * This class is an abstract base implementation for a XMLReader. Use this 050 * class as a base class for SAX based parsers that will parse other formats 051 * than XML. This is quite useful in combination with XSLT. Refer to the 052 * description for the abstract method 053 * {@link #process(BufferedReader) process()} 054 * for a brief run-through how to use this class. 055 * 056 * @see "'Java and XSLT' from Eric M. Burke, O'Reilly 2001" 057 * 058 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 059 * @version $Id: AbstractXMLReader.java 1030 2022-04-06 13:42:02Z tquadrat $ 060 * @since 0.0.5 061 * 062 * @UMLGraph.link 063 */ 064@ClassVersion( sourceVersion = "$Id: AbstractXMLReader.java 1030 2022-04-06 13:42:02Z tquadrat $" ) 065@API( status = STABLE, since = "0.0.5" ) 066public abstract class AbstractXMLReader implements XMLReader 067{ 068 /*-----------*\ 069 ====** Constants **======================================================== 070 \*-----------*/ 071 /** 072 * An empty attribute set. 073 */ 074 public static final Attributes NO_ATTRIBUTES = new AttributesImpl(); 075 076 /*------------*\ 077 ====** Attributes **======================================================= 078 \*------------*/ 079 /** 080 * The content handler that provides the data to parse. 081 */ 082 private ContentHandler m_ContentHandler; 083 084 /** 085 * The DTD handler used by this XMLReader; usually {@code null}. 086 */ 087 private DTDHandler m_DTDHandler; 088 089 /** 090 * The encoding that is used for the input source. If {@code null}, 091 * the platform specific encoding will be used. 092 */ 093 private String m_Encoding = null; 094 095 /** 096 * The entity resolver that is used by this XMLReader. It is especially 097 * used by 098 * {@link #parse(InputSource) parse( InputSource )}. 099 */ 100 private EntityResolver m_EntityResolver; 101 102 /** 103 * The error handler that is used by this XMlReader. 104 */ 105 private ErrorHandler m_ErrorHandler; 106 107 /** 108 * The features that are supported by this XMLReader. 109 */ 110 private final Map<String,Boolean> m_Features = new HashMap<>(); 111 112 /** 113 * The locator that is maintained by this XML reader. 114 */ 115 private final LocatorImpl m_Locator = new LocatorImpl(); 116 117 /** 118 * The properties that are set for this XMLReader. 119 */ 120 private final Map<String,Object> m_Properties = new HashMap<>(); 121 122 /*--------------*\ 123 ====** Constructors **===================================================== 124 \*--------------*/ 125 /** 126 * The default constructor. 127 */ 128 protected AbstractXMLReader() 129 { 130 setErrorHandler( null ); 131 } // AbstractXMLReader() 132 133 /** 134 * Creates a new instance of AbstractXMLReader and assigns the given 135 * content handler. 136 * 137 * @param contentHandler The content handler to use with this XMLReader. 138 */ 139 protected AbstractXMLReader( final ContentHandler contentHandler ) 140 { 141 this(); 142 143 setContentHandler( requireNonNullArgument( contentHandler, "contentHandler" ) ); 144 } // AbstractXMLReader() 145 146 /*---------*\ 147 ====** Methods **========================================================== 148 \*---------*/ 149 /** 150 * Creates a buffered reader from the given input source. 151 * 152 * @param input The input source. 153 * @return The buffered reader for the input data. 154 * @throws IOException Unable to create a reader from the input source. 155 * @throws SAXException Unable to interpret the data provided with the 156 * input source. 157 */ 158 private BufferedReader createReader( final InputSource input ) throws IOException, SAXException 159 { 160 //---* Create a buffered reader from the input source *---------------- 161 BufferedReader retValue = null; 162 //noinspection IfStatementWithTooManyBranches 163 if( nonNull( input.getCharacterStream() ) ) 164 { 165 retValue = new BufferedReader( input.getCharacterStream() ); 166 } 167 else if( nonNull( input.getByteStream() ) ) 168 { 169 retValue = new BufferedReader( nonNull( m_Encoding ) ? new InputStreamReader( input.getByteStream(), m_Encoding ) : new InputStreamReader( input.getByteStream(), UTF8 ) ); 170 } 171 else if( nonNull( input.getSystemId() ) ) 172 { 173 final var entityResolver = getEntityResolver(); 174 if( nonNull( entityResolver ) ) 175 { 176 //---* Use the entity resolver to get the reader *------------- 177 retValue = createReader( entityResolver.resolveEntity( input.getPublicId(), input.getSystemId() ) ); 178 } 179 else 180 { 181 /* 182 * If no entity resolver is set, the system id that is stored 183 * in an InputSource will be taken directly as a fully 184 * qualified URL to a stream somewhere. Otherwise, it will be 185 * translated using that entity resolver. 186 */ 187 final var url = new URL( input.getSystemId() ); 188 retValue = new BufferedReader( nonNull( m_Encoding ) ? new InputStreamReader( url.openStream(), m_Encoding ) : new InputStreamReader( url.openStream(), UTF8 ) ); 189 } 190 } 191 else 192 { 193 //---* Something is weird with this input source *----------------- 194 throw new SAXException( "Invalid Input Source" ); 195 } 196 197 //---* Done *---------------------------------------------------------- 198 return retValue; 199 } // createReader() 200 201 /** 202 * Returns the current content handler. 203 * 204 * @return A reference to the current content handler. 205 */ 206 @Override 207 public final ContentHandler getContentHandler() { return m_ContentHandler; } 208 209 /** 210 * Returns the current DTD handler. 211 * 212 * @return A reference to the current DTD handler. 213 */ 214 @Override 215 public final DTDHandler getDTDHandler() { return m_DTDHandler; } 216 217 /** 218 * Returns the current entity resolver. 219 * 220 * @return A reference to the current entity resolver. 221 */ 222 @Override 223 public final EntityResolver getEntityResolver() { return m_EntityResolver; } 224 225 /** 226 * Returns the current error handler. It will never return 227 * {@code null}; in case no handler was set, a reference to an 228 * instance of 229 * {@link DefaultErrorHandler DefaultErrorHandler} 230 * will be returned. 231 * 232 * @return A reference to the current error handler. 233 */ 234 @Override 235 public final ErrorHandler getErrorHandler() { return m_ErrorHandler; } 236 237 /** 238 * {@inheritDoc} 239 * As this is not meant as a base for an <i>XML</i> parser, this 240 * implementation does not recognise the required namespaces. If this is 241 * needed, the derived class has to provide another implementation for 242 * {@code getFeature()} 243 * and 244 * {@link #setFeature(String, boolean) setFeature()}. 245 * 246 * @param name The name of the feature. 247 * @return {@code true} if the feature is supported, 248 * {@code false} if not or if the name is unknown. 249 * @throws SAXNotRecognizedException The feature value cannot be 250 * retrieved. 251 * @throws SAXNotSupportedException The XMLReader recognizes the 252 * feature name but cannot determine its value at this time. 253 */ 254 @Override 255 public boolean getFeature( final String name ) throws SAXNotRecognizedException, SAXNotSupportedException 256 { 257 final var feature = m_Features.get( requireNonNullArgument( name, "name" ) ); 258 final var retValue = nonNull( feature ) && feature.booleanValue(); 259 260 //---* Done *---------------------------------------------------------- 261 return retValue; 262 } // getFeature() 263 264 /** 265 * Returns the value for the property with the given name. Usually, this 266 * property name is any fully-qualified URI. It is possible for an 267 * XMLReader to recognise a property name but temporarily be unable to 268 * return its value. Some property values may be available only in 269 * specific contexts, such as before, during, or after a parse.<br> 270 * <br>XMLReaders are not required to recognise any specific property 271 * names, though an initial core set is documented for SAX2. But even that 272 * is not supported by this specific implementation; if this is a 273 * requirement, a derived class has to provide its own implementation of 274 * {@code getProperty()} 275 * and 276 * {@link #setProperty(String, Object) setProperty()}. 277 * 278 * @param name The property name, which is a fully-qualified URI. 279 * @return The current value of the property. If the name is not known, 280 * {@code null} will be returned instead of throwing an 281 * exception. 282 * 283 * @throws SAXNotRecognizedException The property value can't be 284 * retrieved. 285 * @throws SAXNotSupportedException The XMLReader recognizes the 286 * property name but cannot determine its value at this time. 287 * 288 * @see #setProperty(String, Object) setProperty() 289 */ 290 @Override 291 public Object getProperty( final String name ) throws SAXNotRecognizedException, SAXNotSupportedException 292 { 293 return m_Properties.get( requireNonNullArgument( name, "name" ) ); 294 } // getProperty() 295 296 /** 297 * Returns a reference to the locator object provided by this base 298 * implementation. 299 * 300 * @return The reference to the locator object. 301 */ 302 protected final Locator getLocator() { return m_Locator; } 303 304 /** 305 * Parses an input data source.<br> 306 * <br>The application can use this method to instruct the XML reader to 307 * begin parsing a document from any valid input source (a character 308 * stream, a byte stream, or a URI).<br> 309 * <br>Applications may not invoke this method while a parse is in 310 * progress (they should create a new XMLReader instead for each nested 311 * document). Once a parse is complete, an application may reuse the same 312 * XMLReader object, possibly with a different input source. Configuration 313 * of the XMLReader object (such as handler bindings and values 314 * established for feature flags and properties) is unchanged by 315 * completion of a parse, unless the definition of that aspect of the 316 * configuration explicitly specifies other behavior (For example, 317 * feature flags or properties exposing characteristics of the document 318 * being parsed).<br> 319 * <br>During the parse, the XMLReader will provide information about the 320 * document through the registered event handlers.<br> 321 * <br>This method is synchronous: it will not return until parsing has 322 * ended. If a client application wants to terminate parsing early, it 323 * should throw an exception.<br> 324 * <br>This implementation calls 325 * {@link #process(BufferedReader) process()} 326 * which is the user provided implementation for the parser.<br> 327 * <br>If no content handler is set, this method returns immediately 328 * without any error message. 329 * 330 * @param input The input source for the top-level of the document. 331 * @throws IOException An IO exception from the parser, possibly from a 332 * byte stream or character stream supplied by the application. 333 * @throws SAXException Any SAX exception, possibly wrapping another 334 * exception. 335 * 336 * @see org.xml.sax.InputSource 337 * @see #parse(java.lang.String) parse( String ) 338 * @see #setEntityResolver(EntityResolver) setEntityResolver() 339 * @see #setDTDHandler(DTDHandler) setDTDHandler() 340 * @see #setContentHandler(ContentHandler) setContentHandler() 341 * @see #setErrorHandler(ErrorHandler) setErrorHandler() 342 */ 343 @Override 344 public final void parse( final InputSource input ) throws IOException, SAXException 345 { 346 final var contentHandler = getContentHandler(); 347 if( nonNull( contentHandler ) ) 348 { 349 //---* Sets the locator *------------------------------------------ 350 /* 351 * m_Locator will never be null. 352 */ 353 contentHandler.setDocumentLocator( m_Locator ); 354 355 //---* Obtain a reader from the input source *--------------------- 356 try( final var reader = createReader( requireNonNullArgument( input, "input" ) ) ) 357 { 358 //---* Let someone else do the work *-------------------------- 359 process( reader ); 360 } 361 } 362 } // parse() 363 364 /** 365 * Parses an XML document from a system identifier (URI).<br> 366 * <br>This method is a shortcut for the common case of reading a 367 * document from a system identifier. It is the exact equivalent of the 368 * following:<br> 369 * {@code parse( new InputSource( systemId ) );}<br> 370 * <br>If the system identifier is a URL, it must be fully resolved by 371 * the application before it is passed to the parser. 372 * 373 * @param systemId The system identifier (URI). 374 * @throws IOException An IO exception from the parser, possibly from a 375 * byte stream or character stream supplied by the application. 376 * @throws SAXException Any SAX exception, possibly wrapping another 377 * exception. 378 * 379 * @see #parse(org.xml.sax.InputSource) parse( InputSource ) 380 */ 381 @Override 382 public final void parse( final String systemId ) throws IOException, SAXException { parse( new InputSource( systemId ) ); } 383 384 /** 385 * This method has to be implemented in order to perform the parsing. It 386 * will be called either from 387 * {@link #parse(String) parse( String )} 388 * or 389 * {@link #parse(InputSource) parse( InputSource )}.<br> 390 * <br>The implementation of this method should update the locator by 391 * appropriate calls to 392 * {@link #setPublicId(String) setPublicId()}, 393 * {@link #setSystemId(String) setSystemId()}, 394 * and 395 * {@link #setLocation(int, int) setLocation()} - 396 * if possible ...<br> 397 * <br>The input is not meant to be XML, so it is difficult to describe 398 * here how to parse the input. But assuming that the input stream is a 399 * Java properties file, the implementation for {@code process()} might 400 * look like this: 401 * <pre><code> 402 * protected void process( BufferedReader input ) throws IOException, SAXException 403 * { 404 * ContentHandler handler = getContentHandler(); 405 * 406 * //---* Load the properties *--------------------------------------- 407 * ExtendedProperties properties = new ExtendedProperties(); 408 * properties.load( input ); 409 * 410 * //---* Create the document *--------------------------------------- 411 * handler.startDocument(); 412 * handler.startElement( null, null, "properties", new AttributesImpl() ); 413 * 414 * //---* Process the properties *------------------------------------ 415 * AttributesImpl attributes; 416 * char [] value; 417 * // Each property will be treated as a value with the key as its 418 * // attribute. 419 * for( String name : properties.stringPropertyNames() ) 420 * { 421 * //---* Start the element *------------------------------------- 422 * attributes = new AttributesImpl(); 423 * attributes.addAttribute( null, null, "name", "ID", name ); 424 * handler.startElement( null, null, "property", attributes ); 425 * 426 * //---* The element contents *---------------------------------- 427 * value = properties.getProperty( name ).toCharArray(); 428 * handler.characters( value, 0, value.length ); 429 * 430 * //---* End the element *--------------------------------------- 431 * handler.endElement( null, null, "property" ); 432 * } 433 * 434 * //---* Finish the document *--------------------------------------- 435 * handler.endElement( null, null, "properties" ); 436 * handler.endDocument(); 437 * } // process() 438 * </code></pre> 439 * 440 * @param input The input stream. 441 * @throws IOException Problems reading the input stream. 442 * @throws SAXException Something has gone wrong. 443 */ 444 protected abstract void process( BufferedReader input ) throws IOException, SAXException; 445 446 /** 447 * Sets the content handler used by this XMLReader. It allows an 448 * application to register a content event handler.<br> 449 * <br>If the application does not register a content handler, all 450 * content events reported by the SAX parser will be silently 451 * ignored.<br> 452 * <br>Applications may register a new or different handler in the middle 453 * of a parse, and the SAX parser must begin using the new handler 454 * immediately. 455 * 456 * @param handler The content handler; may be {@code null}. 457 */ 458 @Override 459 public final void setContentHandler( final ContentHandler handler ) { m_ContentHandler = handler; } 460 461 /** 462 * Sets the DTD handler used by this XMLReader. Allows an application to 463 * register a DTD event handler.<br> 464 * <br>If the application does not register a DTD handler, all DTD events 465 * reported by the SAX parser will be silently ignored.<br> 466 * <br>Applications may register a new or different handler in the middle 467 * of a parse, and the SAX parser must begin using the new handler 468 * immediately. 469 * 470 * @param handler The DTD handler; may be {@code null}. 471 */ 472 @Override 473 public final void setDTDHandler( final DTDHandler handler ) { m_DTDHandler = handler; } 474 475 /** 476 * Sets the encoding for the input source. {@code null} means that 477 * the platform specific encoding is used. 478 * 479 * @param encoding The encoding to use; may be {@code null}. 480 */ 481 public final void setEncoding( final String encoding ) { m_Encoding = encoding; } 482 483 /** 484 * Sets the entity resolver that is used by this XMLReader. Allows an 485 * application to register an entity resolver.<br> 486 * <br>If the application does not register an entity resolver, the 487 * XMLReader will perform its own default resolution.<br> 488 * <br>Applications may register a new or different resolver in the 489 * middle of a parse, and the SAX parser must begin using the new resolver 490 * immediately. 491 * 492 * @param resolver The entity resolver; may be {@code null}. 493 */ 494 @Override 495 public final void setEntityResolver( final EntityResolver resolver ) { m_EntityResolver = resolver; } 496 497 /** 498 * Sets the error handler that is used by this XMLReader. Allows an 499 * application to register an error event handler.<br> 500 * <br>If the application does not register an error handler, all error 501 * events reported by the SAX parser will be written to 502 * {@link System#err System.err} 503 * and otherwise silently ignored; however, normal processing may not 504 * continue. It is highly recommended that all SAX applications implement 505 * an error handler to avoid unexpected bugs.<br> 506 * <br>Applications may register a new or different handler in the middle 507 * of a parse, and the SAX parser must begin using the new handler 508 * immediately. 509 * 510 * @param handler The error handler; may be {@code null}. 511 * 512 * @see DefaultErrorHandler 513 */ 514 @Override 515 public final void setErrorHandler( final ErrorHandler handler ) 516 { 517 m_ErrorHandler = nonNull( handler ) ? handler : DefaultErrorHandler.INSTANCE; 518 } // setErrorHandler() 519 520 /** 521 * Sets the feature flag. 522 * 523 * @param name The name of the feature. 524 * @param value {@code true} if the feature should be supported 525 * by this implementation, {@code false} if not. 526 * @throws SAXNotRecognizedException The feature value cannot be 527 * assigned. 528 * @throws SAXNotSupportedException The XMLReader recognises the 529 * feature name but cannot set the requested value. 530 * 531 * @see #getFeature(String) getFeature() 532 */ 533 @Override 534 public void setFeature( final String name, final boolean value ) throws SAXNotRecognizedException, SAXNotSupportedException 535 { 536 m_Features.put( requireNonNullArgument( name, "name" ), Boolean.valueOf( value ) ); 537 } // setFeature() 538 539 /** 540 * Sets the current location to the locator. 541 * 542 * @param lineNumber The current line number. 543 * @param columnNumber The current column number. 544 */ 545 protected final void setLocation( final int lineNumber, final int columnNumber ) 546 { 547 m_Locator.setLineNumber( lineNumber ); 548 m_Locator.setColumnNumber( columnNumber ); 549 } // setLocation() 550 551 /** 552 * Sets the value of a property. Usually, the property name is any 553 * fully-qualified URI. It is possible for an XMLReader to recognize a 554 * property name but to be unable to change the current value. Some 555 * property values may be immutable or mutable only in specific contexts, 556 * such as before, during, or after a parse.<br> 557 * <br>XMLReaders are not required to recognize setting any specific 558 * property names, though a core set is defined by SAX2. But only this is 559 * not implemented by this implementation.<br> 560 * <br>This method is also the standard mechanism for setting extended 561 * handlers. 562 * 563 * @param name The property name, which is a fully-qualified URI. 564 * @param value The requested value for the property. 565 * @throws SAXNotRecognizedException The property value can't be 566 * assigned or retrieved. 567 * @throws SAXNotSupportedException The XMLReader recognises the 568 * property name but cannot set the requested value. 569 * 570 * @see #getProperty(String) getProperty() 571 */ 572 @Override 573 public void setProperty( final String name, final Object value ) throws SAXNotRecognizedException, SAXNotSupportedException 574 { 575 requireNonNullArgument( name, "name" ); 576 if( nonNull( value ) ) 577 { 578 m_Properties.put( name, value ); 579 } 580 else 581 { 582 m_Properties.remove( name ); 583 } 584 } // setProperty() 585 586 /** 587 * Sets the public id to the locator. 588 * 589 * @param publicId The value for the public id; may be {@code null}. 590 */ 591 protected final void setPublicId( final String publicId ) { m_Locator.setPublicId( publicId ); } 592 593 /** 594 * Sets the system id to the locator. 595 * 596 * @param systemId The value for the system id; may be {@code null}. 597 */ 598 protected final void setSystemId( final String systemId ) { m_Locator.setSystemId( systemId ); } 599} 600// class AbstractXMLReader 601 602/* 603 * End of File 604 */