001/* 002 * ============================================================================ 003 * Copyright © 2002-2023 by Thomas Thrien. 004 * All Rights Reserved. 005 * ============================================================================ 006 * Licensed to the public under the agreements of the GNU Lesser General Public 007 * License, version 3.0 (the "License"). You may obtain a copy of the License at 008 * 009 * http://www.gnu.org/licenses/lgpl.html 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017 018package org.tquadrat.foundation.xml.parse; 019 020import static java.lang.String.format; 021import static org.apiguardian.api.API.Status.MAINTAINED; 022import static org.apiguardian.api.API.Status.STABLE; 023import static org.tquadrat.foundation.lang.CommonConstants.EMPTY_STRING; 024import static org.tquadrat.foundation.lang.Objects.isNull; 025import static org.tquadrat.foundation.lang.Objects.nonNull; 026import static org.tquadrat.foundation.lang.Objects.requireNonNullArgument; 027import static org.tquadrat.foundation.lang.Objects.requireNotEmptyArgument; 028import static org.tquadrat.foundation.util.StringUtils.isEmptyOrBlank; 029import static org.tquadrat.foundation.util.StringUtils.isNotEmptyOrBlank; 030 031import java.net.URI; 032import java.net.URISyntaxException; 033import java.util.HashMap; 034import java.util.Map; 035import java.util.Optional; 036import java.util.TreeMap; 037import java.util.stream.IntStream; 038 039import org.apiguardian.api.API; 040import org.tquadrat.foundation.annotation.ClassVersion; 041import org.tquadrat.foundation.annotation.MountPoint; 042import org.tquadrat.foundation.util.Stack; 043import org.xml.sax.Attributes; 044import org.xml.sax.ContentHandler; 045import org.xml.sax.Locator; 046import org.xml.sax.SAXException; 047import org.xml.sax.SAXParseException; 048import org.xml.sax.helpers.DefaultHandler; 049import org.xml.sax.helpers.LocatorImpl; 050 051/** 052 * <p>{@summary This class implements the interface 053 * {@link ContentHandler ContentHandler} 054 * as a base class for more advanced versions of the 055 * {@link DefaultHandler DefaultHandler class} 056 * or for stand-alone use.}</p> 057 * <p>Instead of implementing the three methods 058 * {@link org.xml.sax.helpers.DefaultHandler#characters(char[],int,int) characters()}, 059 * {@link org.xml.sax.helpers.DefaultHandler#endElement(String,String,String) endElement()}, 060 * and 061 * {@link org.xml.sax.helpers.DefaultHandler#startElement(String,String,String,Attributes) startElement()} 062 * only handlers for the elements have to implemented; after registration of 063 * these handlers using 064 * {@link #registerElementHandler(String, HandlerMethod)} 065 * these handler methods will be called automatically by the default 066 * implementations of 067 * {@link #processElement(Element) processElement()} 068 * and 069 * {@link #openElement(Element) openElement()}.</p> 070 * <p>These method can still be overwritten if a different processing is 071 * desired. When 072 * {@link #processElement(Element) processElement()} 073 * is called after the element is terminated, the attributes together with the 074 * character data after closing the element is provided. The method 075 * {@link #openElement(Element) openElement()} 076 * is called each time an element will be opened, providing the attributes 077 * only.</p> 078 * <p>Some convenience methods have been implemented that will give access 079 * to the parent element and to the path down to the current element.</p> 080 * 081 * <p><b>Note</b>: Unfortunately, this class do not work for XML streams 082 * that has elements embedded into text, as it is usual for HTML. The 083 * snippet</p> 084 * <pre><code><p>First Text <b>Bold Text</b> Second Text</p></code></pre> 085 * <p>will be parsed as "First Text Second Text" for the {@code p} 086 * element and "Bold Text" for the {@code b} element; the 087 * information that the b element was embedded in between is lost.</p> 088 * 089 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 090 * @version $Id: AdvancedContentHandler.java 1101 2024-02-18 00:18:48Z tquadrat $ 091 * @since 0.0.5 092 * 093 * @UMLGraph.link 094 */ 095@SuppressWarnings( "AbstractClassWithoutAbstractMethods" ) 096@ClassVersion( sourceVersion = "$Id: AdvancedContentHandler.java 1101 2024-02-18 00:18:48Z tquadrat $" ) 097@API( status = STABLE, since = "0.0.5" ) 098public abstract class AdvancedContentHandler implements ContentHandler 099{ 100 /*---------------*\ 101 ====** Inner Classes **==================================================== 102 \*---------------*/ 103 /** 104 * This class serves a container for the name, the data and the attributes 105 * of an XML element. 106 * 107 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 108 * @version $Id: AdvancedContentHandler.java 1101 2024-02-18 00:18:48Z tquadrat $ 109 * @since 0.0.5 110 * 111 * @UMLGraph.link 112 */ 113 @SuppressWarnings( {"InnerClassMayBeStatic", "ProtectedInnerClass"} ) 114 @ClassVersion( sourceVersion = "$Id: AdvancedContentHandler.java 1101 2024-02-18 00:18:48Z tquadrat $" ) 115 @API( status = STABLE, since = "0.1.0" ) 116 protected static final class Element 117 { 118 /*------------*\ 119 ====** Attributes **=================================================== 120 \*------------*/ 121 /** 122 * The attributes. 123 */ 124 private final Map<String,Attribute> m_Attributes; 125 126 /** 127 * The data. 128 */ 129 @SuppressWarnings( "StringBufferField" ) 130 private final StringBuilder m_Data; 131 132 /** 133 * The element's local name. 134 */ 135 private final String m_LocalName; 136 137 /** 138 * The parent for this element. 139 */ 140 @SuppressWarnings( "OptionalUsedAsFieldOrParameterType" ) 141 private final Optional<Element> m_Parent; 142 143 /** 144 * The path to the element. 145 */ 146 private final String m_Path; 147 148 /** 149 * The element's qualified name. 150 */ 151 private final String m_QName; 152 153 /** 154 * The namespace for this element; if {@code null}, the 155 * {@linkplain #m_QName qualified name} 156 * and the 157 * {@linkplain #m_LocalName local name} 158 * are the same. 159 */ 160 private final URI m_URI; 161 162 /*--------------*\ 163 ====** Constructors **================================================= 164 \*--------------*/ 165 /** 166 * Create a new object of this class from an element's name and its 167 * attributes. 168 * 169 * @param qName The element's qualified name. 170 * @param localName The element's local name. 171 * @param uri The namespace for the element; can be {@code null}. 172 * @param attributes The element's attributes. 173 * @param path The path to the element; this is a string, compiled 174 * from the element's name, separated by slashes ("/"). 175 * @param parent The parent element for this element; may be 176 * {@code null}. 177 */ 178 @SuppressWarnings( "ConstructorWithTooManyParameters" ) 179 Element( final String qName, final String localName, final URI uri, final Map<String,Attribute> attributes, final String path, @SuppressWarnings( "UseOfConcreteClass" ) final Element parent ) 180 { 181 m_QName = requireNotEmptyArgument( qName, "qName" ); 182 m_LocalName = requireNotEmptyArgument( localName, "localName" ); 183 m_URI = uri; 184 m_Attributes = Map.copyOf( attributes ); 185 m_Path = path; 186 m_Data = new StringBuilder(); 187 m_Parent = Optional.ofNullable( parent ); 188 } // Element 189 190 /*---------*\ 191 ====** Methods **====================================================== 192 \*---------*/ 193 /** 194 * Adds another data chunk to the data block for the current element. 195 * 196 * @param characters The characters. 197 * @param start The start position inside the characters array. 198 * @param end The ending position inside the array. 199 */ 200 public final void appendData( final char [] characters, final int start, final int end ) 201 { 202 m_Data.append( characters, start, end ); 203 } // appendData() 204 205 /** 206 * Returns the attributes of the element. 207 * 208 * @return The attributes. 209 */ 210 public final Map<String,Attribute> getAttributes() { return m_Attributes; } 211 212 /** 213 * Returns the data block for this element. 214 * 215 * @return The data block. 216 */ 217 public final String getData() { return m_Data.toString().trim(); } 218 219 /** 220 * Returns the local name of the element. 221 * 222 * @return The local name of the element. 223 */ 224 public final String getLocalName() { return m_LocalName; } 225 226 /** 227 * Returns the parent element. 228 * 229 * @return An instance of 230 * {@link Optional} 231 * that holds the parent element; will be 232 * {@linkplain Optional#empty() empty} 233 * if this element does not have a parent. 234 */ 235 public final Optional<Element> getParent() { return m_Parent; } 236 237 /** 238 * Returns to XML path to this element. 239 * 240 * @return The element's path. 241 */ 242 public final String getPath() { return m_Path; } 243 244 /** 245 * Returns the prefix from the element's qualified name. 246 * 247 * @return The prefix; if there is no prefix, the empty String will be 248 * returned. 249 */ 250 public final String getPrefix() 251 { 252 final var pos = m_QName.indexOf( ":" ); 253 final var retValue = pos > 0 ? m_QName.substring( 0, pos ) : EMPTY_STRING; 254 255 //---* Done *------------------------------------------------------ 256 return retValue; 257 } // getPrefix() 258 259 /** 260 * Returns the qualified name of the element. 261 * 262 * @return The qualified name of the element. 263 */ 264 public final String getQName() { return m_QName; } 265 266 /** 267 * Returns the namespace URI of the element. 268 * 269 * @return An instance of 270 * {@link Optional} 271 * that holds the namespace URI of the element. 272 */ 273 public final Optional<URI> getURI() { return Optional.ofNullable( m_URI ); } 274 } 275 // class Element 276 277 /** 278 * The functional interface describing a method that processes an XML 279 * element. 280 * 281 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 282 * @version $Id: AdvancedContentHandler.java 1101 2024-02-18 00:18:48Z tquadrat $ 283 * @since 0.1.0 284 * 285 * @UMLGraph.link 286 */ 287 @SuppressWarnings( {"ProtectedInnerClass"} ) 288 @FunctionalInterface 289 @ClassVersion( sourceVersion = "$Id: AdvancedContentHandler.java 1101 2024-02-18 00:18:48Z tquadrat $" ) 290 @API( status = MAINTAINED, since = "0.1.0" ) 291 protected interface HandlerMethod 292 { 293 /*---------*\ 294 ====** Methods **====================================================== 295 \*---------*/ 296 /** 297 * <p>{@summary Processes an XML element.}</p> 298 * <p>As each element should have its own handler, the tag is not 299 * provided as an argument. If necessary, the tag can be derived from 300 * the {@code path} argument.</p> 301 * 302 * @param terminateElement {@code true} if called by 303 * {@link #processElement(Element)}, 304 * indicating that the element processing will be terminated, 305 * {@code false} when called by 306 * {@link #openElement(Element)}. 307 * @param data The element data; will be {@code null} if called 308 * by 309 * {@link #openElement(Element)}. 310 * @param attributes The element attributes. 311 * @param path The element path. 312 * @throws SAXException The element cannot be handled properly. 313 * 314 * @since 0.1.0 315 */ 316 public void process( final boolean terminateElement, final String data, final Map<String,Attribute> attributes, final String path ) throws SAXException; 317 } 318 // interface HandlerMethod 319 320 /*-----------*\ 321 ====** Constants **======================================================== 322 \*-----------*/ 323 /** 324 * An empty array of Element objects. 325 */ 326 private static final Element [] EMPTY_Element_ARRAY = new Element [0]; 327 328 /** 329 * The message indicating an invalid URI: {@value}. 330 */ 331 public static final String MSG_InvalidURI = "Invalid namespace URI: %s"; 332 333 /** 334 * The message indicating that there is no element handler for the given 335 * element: {@value}. 336 */ 337 public static final String MSG_NoHandler = "No handler for element '%1$s'"; 338 339 /** 340 * The message indicating that there is no element on the stack: {@value}. 341 */ 342 public static final String MSG_NoElementOnStack = "No element on stack"; 343 344 /*------------*\ 345 ====** Attributes **======================================================= 346 \*------------*/ 347 /** 348 * The document type. 349 */ 350 private String m_DocumentType = null; 351 352 /** 353 * This stack contains the open elements, stored as instances of 354 * {@link Element}. 355 */ 356 @SuppressWarnings( "UseOfConcreteClass" ) 357 private final Stack<Element> m_ElementStack = new Stack<>(); 358 359 /** 360 * The element handler methods. The key for this map is the qualified 361 * name of the element. 362 */ 363 private final Map<String,HandlerMethod> m_HandlerMethods = new TreeMap<>(); 364 365 /** 366 * The locator. 367 */ 368 private Locator m_Locator; 369 370 /** 371 * The name spaces. The prefix is the key to the map, while the URI is the 372 * value. 373 */ 374 private final Map<String,URI> m_Namespaces = new HashMap<>(); 375 376 /*--------------*\ 377 ====** Constructors **===================================================== 378 \*--------------*/ 379 /** 380 * The default constructor. 381 */ 382 protected AdvancedContentHandler() { /* Does nothing! */ } 383 384 /*---------*\ 385 ====** Methods **========================================================== 386 \*---------*/ 387 /** 388 * Receives notification of character data inside an element. 389 * 390 * @param ch The characters. 391 * @param start The start position inside the characters array. 392 * @param length The length of the subset to process. 393 * @throws SAXException Something has gone wrong. 394 */ 395 @Override 396 public final void characters( final char [] ch, final int start, final int length ) throws SAXException 397 { 398 final var element = m_ElementStack 399 .peek() 400 .orElseThrow( () -> new SAXParseException( MSG_NoElementOnStack, getLocator() ) ); 401 element.appendData( ch, start, length ); 402 } // characters() 403 404 /** 405 * Composes an 406 * {@link Attribute} 407 * instance from the data of the given 408 * {@link Attributes} 409 * instance at the given index. 410 * 411 * @param attributes The attributes. 412 * @param index The index. 413 * @return The attribute. 414 * @throws URISyntaxException The URI for the attribute's namespace 415 * cannot be parsed correctly. 416 * @throws IllegalArgumentException The attribute type is invalid. 417 */ 418 private static final Attribute composeAttribute( final Attributes attributes, final int index ) throws IllegalArgumentException, URISyntaxException 419 { 420 final var qName = attributes.getQName( index ); 421 final var attributesLocalName = attributes.getLocalName( index ); 422 final Optional<String> localName = isEmptyOrBlank( attributesLocalName ) ? Optional.empty() : Optional.of( attributesLocalName ); 423 final var attributesURI = attributes.getURI( index ); 424 final Optional<URI> uri = isEmptyOrBlank( attributesURI ) ? Optional.empty() : Optional.of( new URI( attributesURI ) ); 425 final var type = Attribute.Type.valueOf( attributes.getType( index ) ); 426 final var value = attributes.getValue( index ); 427 428 final var retValue = new Attribute( qName, localName, uri, type, value, index ); 429 430 //---* Done *---------------------------------------------------------- 431 return retValue; 432 } // composeAttribute() 433 434 /** 435 * Receives the notification about the end of the document.<br> 436 * <br>This implementation does nothing by default. Application writers 437 * may override this method in a subclass to take specific actions at the 438 * end of a document (such as finalising a tree or closing an output 439 * file). 440 * 441 * @throws SAXException Any SAX exception, possibly wrapping another 442 * exception. 443 */ 444 @SuppressWarnings( "NoopMethodInAbstractClass" ) 445 @Override 446 @MountPoint 447 public void endDocument() throws SAXException { /* Does nothing! */ } 448 449 /** 450 * {@summary Receives the notification about the end of an element.} This 451 * method will call the 452 * {@link #processElement(Element) processElement()} 453 * method and afterwards it will remove the element from the stack - in 454 * exactly that order, otherwise the 455 * {@link #getPath() getPath()} 456 * method would return wrong results. 457 * 458 * @param uri The URI for the namespace of this element; can be empty. 459 * @param localName The local name of the element. 460 * @param qName The element's qualified name. 461 * @throws SAXException The element was not correct according to the 462 * DTD. 463 */ 464 @Override 465 public final void endElement( final String uri, final String localName, final String qName ) throws SAXException 466 { 467 final var element = m_ElementStack.peek().orElseThrow( () -> new SAXParseException( "No element '%1$s' on Stack".formatted( qName ), getLocator() ) ); 468 if( !element.getQName().equals( qName ) ) 469 { 470 throw new SAXParseException( "Closing element '%1$s' does not match open element '%2$s'".formatted( qName, element.getQName() ), getLocator() ); 471 } 472 processElement( element ); 473 474 //---* Remove element from stack *------------------------------------- 475 m_ElementStack.pop(); 476 } // endElement() 477 478 /** 479 * Receives the notification of the end for a name space mapping. 480 * 481 * @param prefix The Namespace prefix being declared. 482 * @throws SAXException Any SAX exception, possibly wrapping another 483 * exception. 484 */ 485 @Override 486 public final void endPrefixMapping( final String prefix ) throws SAXException 487 { 488 //---* Delete the namespace *------------------------------------------ 489 m_Namespaces.remove( requireNonNullArgument( prefix, "prefix" ) ); 490 } // endPrefixMapping() 491 492 /** 493 * Returns the name of the document type. 494 * 495 * @return The document type. 496 */ 497 public final String getDocumentType() { return m_DocumentType; } 498 499 /** 500 * Returns a copy of the locator. 501 * 502 * @return A copy of the locator object or {@code null} if there was 503 * none provided by the parser. 504 */ 505 protected final Locator getLocator() { return nonNull( m_Locator ) ? new LocatorImpl( m_Locator ) : null; } 506 507 /** 508 * Returns the path for the element as an array, with the qualified 509 * element names as the entries in the array. The array is ordered in the 510 * way that the current element is at position {@code [0]}, while the root 511 * element (the document element) is at {@code [length - 1]}. 512 * 513 * @return The list of element names that build the path to the current 514 * element. 515 */ 516 protected final String [] getPath() 517 { 518 final var elements = m_ElementStack.toArray( EMPTY_Element_ARRAY ); 519 final var retValue = IntStream.range( 0, m_ElementStack.size() ) 520 .mapToObj( i -> elements [i].getQName() ) 521 .toArray( String[]::new ); 522 523 //---* Done *---------------------------------------------------------- 524 return retValue; 525 } // getPath() 526 527 /** 528 * Returns the path depth for the element. 529 * 530 * @return The number of nodes on the path to the current element. 0 means 531 * that the current element is the document. 532 */ 533 protected final int getPathDepth() { return m_ElementStack.size() - 1; } 534 535 /** 536 * The default element handling; it does nothing. 537 * 538 * @param element The element. 539 * @param terminateElement {@code true} if called by 540 * {@link #processElement(Element)}, 541 * indicating that the element processing will be terminated, 542 * {@code false} when called by 543 * {@link #openElement(Element)}. 544 * @throws SAXException The element cannot be handled properly. 545 * 546 * @since 0.1.0 547 */ 548 @SuppressWarnings( {"unused", "NoopMethodInAbstractClass"} ) 549 @MountPoint 550 @API( status = MAINTAINED, since = "0.1.0" ) 551 protected void handleElement( @SuppressWarnings( "UseOfConcreteClass" ) final Element element, final boolean terminateElement ) throws SAXException 552 { 553 //---* Does nothing *-------------------------------------------------- 554 } // handleElement() 555 556 /** 557 * Receives the notification of ignorable whitespace in element 558 * content.<br> 559 * <br>This implementation does nothing by default. Application writers 560 * may override this method to take specific actions for each chunk of 561 * ignorable whitespace (such as adding data to a node or buffer, or 562 * printing it to a file). 563 * 564 * @param ch The whitespace characters. 565 * @param start The start position in the character array. 566 * @param length The number of characters to use from the character 567 * array. 568 * @throws SAXException Any SAX exception, possibly wrapping another 569 * exception. 570 */ 571 @SuppressWarnings( "NoopMethodInAbstractClass" ) 572 @Override 573 @MountPoint 574 public void ignorableWhitespace( final char [] ch, final int start, final int length ) throws SAXException { /* Does nothing! */ } 575 576 /** 577 * This method is called every time a new element was encountered by the 578 * parser. It should be overwritten if it is necessary to perform any 579 * activities for a specific element.<br> 580 * <br>The default implementation looks up a method handler in the map of 581 * element handlers and calls that, or throws an exception if no handler 582 * was registered for that element. 583 * 584 * @param element The element. 585 * @throws SAXException Something has gone wrong. 586 * 587 * @since 0.1.0 588 */ 589 @MountPoint 590 @API( status = MAINTAINED, since = "0.1.0" ) 591 protected void openElement( @SuppressWarnings( "UseOfConcreteClass" ) final Element element ) throws SAXException 592 { 593 final var method = m_HandlerMethods.get( element.getQName() ); 594 if( isNull(method ) ) throw new SAXException( format( MSG_NoHandler, element ) ); 595 596 //---* Process the element *------------------------------------------- 597 method.process( false, null, element.getAttributes(), element.getPath() ); 598 } // openElement() 599 600 /** 601 * Processing of an element of the XML file. This method will be called 602 * by 603 * {@link #endElement(String,String,String) endElement()} 604 * any time an element was closed.<br> 605 * <br>The default implementation looks up a method handler in the map of 606 * element handlers and calls that, or throws an exception if no handler 607 * was registered for that element. 608 * 609 * @param element The element. 610 * @throws SAXException Something has gone wrong. 611 * 612 * @since 0.1.0 613 */ 614 @MountPoint 615 @API( status = MAINTAINED, since = "0.1.0" ) 616 protected void processElement( @SuppressWarnings( "UseOfConcreteClass" ) final Element element ) throws SAXException 617 { 618 final var method = m_HandlerMethods.get( element.getQName() ); 619 if( isNull(method ) ) throw new SAXException( format( MSG_NoHandler, element.getQName() ) ); 620 621 //---* Process the element *------------------------------------------- 622 method.process( true, element.getData(), element.getAttributes(), element.getPath() ); 623 } // processElement() 624 625 /** 626 * Receives notification of a processing instruction.<br> 627 * <br>This implementation does nothing by default. Application writers 628 * may override this method in a subclass to take specific actions for 629 * each processing instruction, such as setting status variables or 630 * invoking other methods. 631 * 632 * @param target The processing instruction target. 633 * @param data The processing instruction data, or {@code null} 634 * if none is supplied. 635 * @throws SAXException Any SAX exception, possibly wrapping another 636 * exception. 637 */ 638 @SuppressWarnings( "NoopMethodInAbstractClass" ) 639 @Override 640 @MountPoint 641 public void processingInstruction( final String target, final String data ) throws SAXException { /* Does nothing! */ } 642 643 /** 644 * Adds an element handler to the map of handler methods. 645 * 646 * @param qName The qualified name of the elements that should be 647 * processed by the handler . 648 * @param method The method reference for the handler. 649 */ 650 protected final void registerElementHandler( final String qName, final HandlerMethod method ) 651 { 652 m_HandlerMethods.put( requireNotEmptyArgument( qName, "qName" ), requireNonNullArgument( method, "method" ) ); 653 } // addElementHandler() 654 655 /** 656 * Returns the current column number in the XML file. A negative value 657 * indicates that the column is unknown. 658 * 659 * @return The current column number. 660 */ 661 protected final int retrieveCurrentColumn() { return nonNull( m_Locator ) ? m_Locator.getColumnNumber() : -1; } 662 663 /** 664 * Returns the current line number in the XML file. A negative value 665 * indicates that the line is unknown. 666 * 667 * @return The current line number. 668 */ 669 protected final int retrieveCurrentLine() { return nonNull( m_Locator ) ? m_Locator.getLineNumber() : -1; } 670 671 /** 672 * Returns the namespace for the current element (that one that is on top 673 * of the element stack). 674 * 675 * @return An instance of 676 * {@link Optional} 677 * that holds the namespace for the current element. Will be 678 * {@linkplain Optional#empty() empty} 679 * if there is no namespace for the current element. 680 * @throws SAXException An error occurred while retrieving the 681 * namespace information. 682 * 683 * @since 0.1.0 684 */ 685 @API( status = MAINTAINED, since = "0.1.0" ) 686 protected final Optional<URI> retrieveCurrentNamespace() throws SAXException 687 { 688 final var element = m_ElementStack.peek().orElseThrow( () -> new SAXParseException( MSG_NoElementOnStack, getLocator() ) ); 689 final var retValue = element.getURI(); 690 691 //---* Done *---------------------------------------------------------- 692 return retValue; 693 } // retrieveCurrentNamespace() 694 695 /** 696 * Returns the URI of the namespace for the given prefix. 697 * 698 * @param prefix The prefix. 699 * @return An instance of 700 * {@link Optional} 701 * that holds the namespace for the prefix. Will be 702 * {@linkplain Optional#empty() empty} 703 * if there is no namespace for the given prefix. 704 * 705 * @since 0.1.0 706 */ 707 @API( status = MAINTAINED, since = "0.1.0" ) 708 protected final Optional<URI> retrieveNamespace( final String prefix ) 709 { 710 final var retValue = Optional.ofNullable( m_Namespaces.get( requireNotEmptyArgument( prefix, "prefix" ) ) ); 711 712 //---* Done *---------------------------------------------------------- 713 return retValue; 714 } // retrieveNamespace() 715 716 /** 717 * Returns the registered prefix for the given namespace. If more than one 718 * prefix is registered for the same namespace, only that one that is 719 * alphabetically the first one will be returned. 720 * 721 * @param namespace The URI for the namespace. 722 * @return An instance of 723 * {@link Optional} 724 * that holds the registered prefix. 725 * @since 0.1.0 726 */ 727 @API( status = MAINTAINED, since = "0.1.0" ) 728 protected final Optional<String> retrievePrefix( final URI namespace ) 729 { 730 requireNonNullArgument( namespace, "namespace" ); 731 final var retValue = m_Namespaces 732 .entrySet() 733 .stream() 734 .filter( entry -> entry.getValue().equals( namespace ) ) 735 .map( Map.Entry::getKey ) 736 .findFirst(); 737 738 //---* Done *---------------------------------------------------------- 739 return retValue; 740 } // retrievePrefix() 741 742 /** 743 * Receives an object for locating the origin of SAX document 744 * events.<br> 745 * <br>SAX parsers are strongly encouraged (though not absolutely 746 * required) to supply a locator: if it does so, it must supply the 747 * locator to the application by invoking this method before invoking any 748 * of the other methods in the ContentHandler interface.<br> 749 * <br>The locator allows the application to determine the end position 750 * of any document-related event, even if the parser is not reporting an 751 * error. Typically, the application will use this information for 752 * reporting its own errors (such as character content that does not match 753 * an application's business rules). The information returned by the 754 * locator is probably not sufficient for use with a search engine.<br> 755 * <br>Note that the locator will return correct information only during 756 * the invocation SAX event callbacks after startDocument returns and 757 * before endDocument is called. The application should not attempt to use 758 * it at any other time. 759 * 760 * @param locator An object that can return the location of any SAX 761 * document event. 762 */ 763 @Override 764 public final void setDocumentLocator( final Locator locator ) { m_Locator = requireNonNullArgument( locator, "locator" ); } 765 766 /** 767 * <p>{@summary Receives notification of a skipped entity.}</p> 768 * <p>This implementation does nothing by default. Application writers 769 * may override this method in a subclass to take specific actions for 770 * each processing instruction, such as setting status variables or 771 * invoking other methods.</p> 772 * 773 * @param name The name of the skipped entity. 774 * @throws SAXException Any SAX exception, possibly wrapping another 775 * exception. 776 */ 777 @SuppressWarnings( "NoopMethodInAbstractClass" ) 778 @Override 779 @MountPoint 780 public void skippedEntity( final String name ) throws SAXException { /* Does nothing! */ } 781 782 /** 783 * Receives the notification about the start of an element. 784 * 785 * @param uri The URI for the namespace of this element; can be empty. 786 * @param localName The local name of the element. 787 * @param qName The element's qualified name. 788 * @param attributes The element's attributes. 789 * @throws SAXException The element was not correct according to the 790 * DTD. 791 */ 792 @SuppressWarnings( "OverlyComplexMethod" ) 793 @Override 794 public final void startElement( final String uri, final String localName, final String qName, final Attributes attributes ) throws SAXException 795 { 796 if( isNull( localName ) && isNull( qName ) ) 797 { 798 throw new SAXParseException( "No name for element", getLocator() ); 799 } 800 801 //---* Store the document type *--------------------------------------- 802 if( isNull( m_DocumentType ) ) m_DocumentType = qName; 803 804 //---* Build the path *------------------------------------------------ 805 Element parent = null; 806 final var path = new StringBuilder(); 807 if( !m_ElementStack.isEmpty() ) 808 { 809 parent = m_ElementStack.peek().orElseThrow( () -> new SAXParseException( MSG_NoElementOnStack, getLocator() ) ); 810 path.append( parent.getPath() ); 811 } 812 path.append( '/' ) 813 .append( qName.trim() ); 814 815 //---* Build the attributes map *-------------------------------------- 816 final Map<String,Attribute> attributesMap = new HashMap<>(); 817 try 818 { 819 for( var i = 0; i < attributes.getLength(); ++i ) 820 { 821 attributesMap.put( attributes.getQName( i ), composeAttribute( attributes, i ) ); 822 } 823 } 824 catch( final IllegalArgumentException | URISyntaxException e ) 825 { 826 throw new SAXParseException( "Invalid Argument data", getLocator(), e ); 827 } 828 829 //---* Build the element *--------------------------------------------- 830 var effectiveQName = qName; 831 var effectiveLocalName = localName; 832 URI namespace = null; 833 if( isNotEmptyOrBlank( uri ) ) 834 { 835 try 836 { 837 namespace = new URI( uri ); 838 } 839 catch( final URISyntaxException e ) 840 { 841 throw new SAXParseException( format( MSG_InvalidURI, uri), getLocator(), e ); 842 } 843 844 if( isNull( effectiveLocalName ) ) 845 { 846 final var pos = effectiveQName.indexOf( ':' ); 847 effectiveLocalName = pos == -1 ? effectiveQName : effectiveQName.substring( pos + 1 ); 848 } 849 if( isNull( effectiveQName ) ) 850 { 851 final var prefix = retrievePrefix( namespace ).orElseThrow( () -> new SAXParseException( "Unknown Namespace: %s".formatted( uri ), getLocator() ) ); 852 effectiveQName = format( "%s:%s", prefix, effectiveLocalName ); 853 } 854 } 855 else 856 { 857 if( isNull( effectiveLocalName ) ) effectiveLocalName = effectiveQName; 858 if( isNull( effectiveQName ) ) effectiveQName = effectiveLocalName; 859 } 860 861 final var element = new Element( effectiveQName, effectiveLocalName, namespace, attributesMap, path.toString(), parent ); 862 m_ElementStack.push( element ); 863 openElement( element ); 864 } // startElement() 865 866 /** 867 * Receives the notification of the beginning of the document.<br> 868 * <br>This implementation does nothing by default. Application writers 869 * may override this method in a subclass to take specific actions at the 870 * beginning of a document (such as allocating the root node of a tree or 871 * creating an output file). 872 * 873 * @throws SAXException Any SAX exception, possibly wrapping another 874 * exception. 875 */ 876 @SuppressWarnings( "NoopMethodInAbstractClass" ) 877 @Override 878 @MountPoint 879 public void startDocument() throws SAXException { /* Does nothing! */ } 880 881 /** 882 * Receives the notification of the start of a Namespace mapping. 883 * 884 * @param prefix The Namespace prefix being declared. 885 * @param uri The Namespace URI mapped to the prefix. 886 * @throws SAXException Any SAX exception, possibly wrapping another 887 * exception. 888 */ 889 @Override 890 public final void startPrefixMapping( final String prefix, final String uri ) throws SAXException 891 { 892 final URI namespace; 893 try 894 { 895 namespace = new URI( requireNonNullArgument( uri, "uri" ) ); 896 } 897 catch( final URISyntaxException e ) 898 { 899 throw new SAXParseException( format( MSG_InvalidURI, uri), getLocator(), e ); 900 } 901 902 //---* Store the mapping *--------------------------------------------- 903 m_Namespaces.put( requireNonNullArgument( prefix, "prefix" ), namespace ); 904 } // startPrefixMapping() 905} 906// class AdvancedContentHandler 907 908/* 909 * End of File 910 */