001/*
002 * ============================================================================
003 * Copyright © 2002-2021 by Thomas Thrien.
004 * All Rights Reserved.
005 * ============================================================================
006 * Licensed to the public under the agreements of the GNU Lesser General Public
007 * License, version 3.0 (the "License"). You may obtain a copy of the License at
008 *
009 *      http://www.gnu.org/licenses/lgpl.html
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
014 * License for the specific language governing permissions and limitations
015 * under the License.
016 */
017
018package org.tquadrat.foundation.xml.parse;
019
020import static org.apiguardian.api.API.Status.STABLE;
021import static org.tquadrat.foundation.lang.CommonConstants.UTF8;
022import static org.tquadrat.foundation.lang.Objects.nonNull;
023import static org.tquadrat.foundation.lang.Objects.requireNonNullArgument;
024
025import java.io.BufferedReader;
026import java.io.IOException;
027import java.io.InputStreamReader;
028import java.net.URL;
029import java.util.HashMap;
030import java.util.Map;
031
032import org.apiguardian.api.API;
033import org.tquadrat.foundation.annotation.ClassVersion;
034import org.xml.sax.Attributes;
035import org.xml.sax.ContentHandler;
036import org.xml.sax.DTDHandler;
037import org.xml.sax.EntityResolver;
038import org.xml.sax.ErrorHandler;
039import org.xml.sax.InputSource;
040import org.xml.sax.Locator;
041import org.xml.sax.SAXException;
042import org.xml.sax.SAXNotRecognizedException;
043import org.xml.sax.SAXNotSupportedException;
044import org.xml.sax.XMLReader;
045import org.xml.sax.helpers.AttributesImpl;
046import org.xml.sax.helpers.LocatorImpl;
047
048/**
049 *  This class is an abstract base implementation for a XMLReader. Use this
050 *  class as a base class for SAX based parsers that will parse other formats
051 *  than XML. This is quite useful in combination with XSLT. Refer to the
052 *  description for the abstract method
053 *  {@link #process(BufferedReader) process()}
054 *  for a brief run-through how to use this class.
055 *
056 *  @see "'Java and XSLT' from Eric M. Burke, O'Reilly 2001"
057 *
058 *  @extauthor Thomas Thrien - thomas.thrien@tquadrat.org
059 *  @version $Id: AbstractXMLReader.java 1030 2022-04-06 13:42:02Z tquadrat $
060 *  @since 0.0.5
061 *
062 *  @UMLGraph.link
063 */
064@ClassVersion( sourceVersion = "$Id: AbstractXMLReader.java 1030 2022-04-06 13:42:02Z tquadrat $" )
065@API( status = STABLE, since = "0.0.5" )
066public abstract class AbstractXMLReader implements XMLReader
067{
068        /*-----------*\
069    ====** Constants **========================================================
070        \*-----------*/
071    /**
072     *  An empty attribute set.
073     */
074    public static final Attributes NO_ATTRIBUTES = new AttributesImpl();
075
076        /*------------*\
077    ====** Attributes **=======================================================
078        \*------------*/
079    /**
080     *  The content handler that provides the data to parse.
081     */
082    private ContentHandler m_ContentHandler;
083
084    /**
085     *  The DTD handler used by this XMLReader; usually {@code null}.
086     */
087    private DTDHandler m_DTDHandler;
088
089    /**
090     *  The encoding that is used for the input source. If {@code null},
091     *  the platform specific encoding will be used.
092     */
093    private String m_Encoding = null;
094
095    /**
096     *  The entity resolver that is used by this XMLReader. It is especially
097     *  used by
098     *  {@link #parse(InputSource) parse( InputSource )}.
099     */
100    private EntityResolver m_EntityResolver;
101
102    /**
103     *  The error handler that is used by this XMlReader.
104     */
105    private ErrorHandler m_ErrorHandler;
106
107    /**
108     *  The features that are supported by this XMLReader.
109     */
110    private final Map<String,Boolean> m_Features = new HashMap<>();
111
112    /**
113     *  The locator that is maintained by this XML reader.
114     */
115    private final LocatorImpl m_Locator = new LocatorImpl();
116
117    /**
118     *  The properties that are set for this XMLReader.
119     */
120    private final Map<String,Object> m_Properties = new HashMap<>();
121
122        /*--------------*\
123    ====** Constructors **=====================================================
124        \*--------------*/
125    /**
126     *  The default constructor.
127     */
128    protected AbstractXMLReader()
129    {
130        setErrorHandler( null );
131    }   //  AbstractXMLReader()
132
133    /**
134     *  Creates a new instance of AbstractXMLReader and assigns the given
135     *  content handler.
136     *
137     *  @param  contentHandler  The content handler to use with this XMLReader.
138     */
139    protected AbstractXMLReader( final ContentHandler contentHandler )
140    {
141        this();
142
143        setContentHandler( requireNonNullArgument( contentHandler, "contentHandler" ) );
144    }   //  AbstractXMLReader()
145
146        /*---------*\
147    ====** Methods **==========================================================
148        \*---------*/
149    /**
150     *  Creates a buffered reader from the given input source.
151     *
152     *  @param  input   The input source.
153     *  @return The buffered reader for the input data.
154     *  @throws IOException Unable to create a reader from the input source.
155     *  @throws SAXException    Unable to interpret the data provided with the
156     *      input source.
157     */
158    private BufferedReader createReader( final InputSource input ) throws IOException, SAXException
159    {
160        //---* Create a buffered reader from the input source *----------------
161        BufferedReader retValue = null;
162        //noinspection IfStatementWithTooManyBranches
163        if( nonNull( input.getCharacterStream() ) )
164        {
165            retValue = new BufferedReader( input.getCharacterStream() );
166        }
167        else if( nonNull( input.getByteStream() ) )
168        {
169            retValue = new BufferedReader( nonNull( m_Encoding ) ? new InputStreamReader( input.getByteStream(), m_Encoding ) : new InputStreamReader( input.getByteStream(), UTF8 ) );
170        }
171        else if( nonNull( input.getSystemId() ) )
172        {
173            final var entityResolver = getEntityResolver();
174            if( nonNull( entityResolver ) )
175            {
176                //---* Use the entity resolver to get the reader *-------------
177                retValue = createReader( entityResolver.resolveEntity( input.getPublicId(), input.getSystemId() ) );
178            }
179            else
180            {
181                /*
182                 * If no entity resolver is set, the system id that is stored
183                 * in an InputSource will be taken directly as a fully
184                 * qualified URL to a stream somewhere. Otherwise, it will be
185                 * translated using that entity resolver.
186                 */
187                final var url = new URL( input.getSystemId() );
188                retValue = new BufferedReader( nonNull( m_Encoding ) ? new InputStreamReader( url.openStream(), m_Encoding ) : new InputStreamReader( url.openStream(), UTF8 ) );
189            }
190        }
191        else
192        {
193            //---* Something is weird with this input source *-----------------
194            throw new SAXException( "Invalid Input Source" );
195        }
196
197        //---* Done *----------------------------------------------------------
198        return retValue;
199    }   //  createReader()
200
201    /**
202     *  Returns the current content handler.
203     *
204     *  @return A reference to the current content handler.
205     */
206    @Override
207    public final ContentHandler getContentHandler() { return m_ContentHandler; }
208
209    /**
210     *  Returns the current DTD handler.
211     *
212     *  @return A reference to the current DTD handler.
213     */
214    @Override
215    public final DTDHandler getDTDHandler() { return m_DTDHandler; }
216
217    /**
218     *  Returns the current entity resolver.
219     *
220     *  @return A reference to the current entity resolver.
221     */
222    @Override
223    public final EntityResolver getEntityResolver() { return m_EntityResolver; }
224
225    /**
226     *  Returns the current error handler. It will never return
227     *  {@code null}; in case no handler was set, a reference to an
228     *  instance of
229     *  {@link DefaultErrorHandler DefaultErrorHandler}
230     *  will be returned.
231     *
232     *  @return A reference to the current error handler.
233     */
234    @Override
235    public final ErrorHandler getErrorHandler() { return m_ErrorHandler; }
236
237    /**
238     *  {@inheritDoc}
239     *  As this is not meant as a base for an <i>XML</i> parser, this
240     *  implementation does not recognise the required namespaces. If this is
241     *  needed, the derived class has to provide another implementation  for
242     *  {@code getFeature()}
243     *  and
244     *  {@link #setFeature(String, boolean) setFeature()}.
245     *
246     *  @param  name    The name of the feature.
247     *  @return {@code true} if the feature is supported,
248     *      {@code false} if not or if the name is unknown.
249     *  @throws SAXNotRecognizedException   The feature value cannot be
250     *      retrieved.
251     *  @throws SAXNotSupportedException    The XMLReader recognizes the
252     *      feature name but cannot determine its value at this time.
253     */
254    @Override
255    public boolean getFeature( final String name ) throws SAXNotRecognizedException, SAXNotSupportedException
256    {
257        final var feature = m_Features.get( requireNonNullArgument( name, "name" ) );
258        final var retValue = nonNull( feature ) && feature.booleanValue();
259
260        //---* Done *----------------------------------------------------------
261        return retValue;
262    }   //  getFeature()
263
264    /**
265     *  Returns the value for the property with the given name. Usually, this
266     *  property name is any fully-qualified URI. It is possible for an
267     *  XMLReader to recognise a property name but temporarily be unable to
268     *  return its value. Some property values may be available only in
269     *  specific contexts, such as before, during, or after a parse.<br>
270     *  <br>XMLReaders are not required to recognise any specific property
271     *  names, though an initial core set is documented for SAX2. But even that
272     *  is not supported by this specific implementation; if this is a
273     *  requirement, a derived class has to provide its own implementation of
274     *  {@code getProperty()}
275     *  and
276     *  {@link #setProperty(String, Object) setProperty()}.
277     *
278     *  @param  name    The property name, which is a fully-qualified URI.
279     *  @return The current value of the property. If the name is not known,
280     *      {@code null} will be returned instead of throwing an
281     *      exception.
282     *
283     *  @throws SAXNotRecognizedException   The property value can't be
284     *      retrieved.
285     *  @throws SAXNotSupportedException    The XMLReader recognizes the
286     *      property name but cannot determine its value at this time.
287     *
288     *  @see #setProperty(String, Object) setProperty()
289     */
290    @Override
291    public Object getProperty( final String name ) throws SAXNotRecognizedException, SAXNotSupportedException
292    {
293        return m_Properties.get( requireNonNullArgument( name, "name" ) );
294    }   //  getProperty()
295
296    /**
297     *  Returns a reference to the locator object provided by this base
298     *  implementation.
299     *
300     *  @return The reference to the locator object.
301     */
302    protected final Locator getLocator() { return m_Locator; }
303
304    /**
305     *  Parses an input data source.<br>
306     *  <br>The application can use this method to instruct the XML reader to
307     *  begin parsing a document from any valid input source (a character
308     *  stream, a byte stream, or a URI).<br>
309     *  <br>Applications may not invoke this method while a parse is in
310     *  progress (they should create a new XMLReader instead for each nested
311     *  document). Once a parse is complete, an application may reuse the same
312     *  XMLReader object, possibly with a different input source. Configuration
313     *  of the XMLReader object (such as handler bindings and values
314     *  established for feature flags and properties) is unchanged by
315     *  completion of a parse, unless the definition of that aspect of the
316     *  configuration explicitly specifies other behavior (For example,
317     *  feature flags or properties exposing characteristics of the document
318     *  being parsed).<br>
319     *  <br>During the parse, the XMLReader will provide information about the
320     *  document through the registered event handlers.<br>
321     *  <br>This method is synchronous: it will not return until parsing has
322     *  ended. If a client application wants to terminate parsing early, it
323     *  should throw an exception.<br>
324     *  <br>This implementation calls
325     *  {@link #process(BufferedReader) process()}
326     *  which is the user provided implementation for the parser.<br>
327     *  <br>If no content handler is set, this method returns immediately
328     *  without any error message.
329     *
330     *  @param  input   The input source for the top-level of the document.
331     *  @throws IOException An IO exception from the parser, possibly from a
332     *      byte stream or character stream supplied by the application.
333     *  @throws SAXException    Any SAX exception, possibly wrapping another
334     *      exception.
335     *
336     *  @see org.xml.sax.InputSource
337     *  @see #parse(java.lang.String) parse( String )
338     *  @see #setEntityResolver(EntityResolver) setEntityResolver()
339     *  @see #setDTDHandler(DTDHandler) setDTDHandler()
340     *  @see #setContentHandler(ContentHandler) setContentHandler()
341     *  @see #setErrorHandler(ErrorHandler) setErrorHandler()
342     */
343    @Override
344    public final void parse( final InputSource input ) throws IOException, SAXException
345    {
346        final var contentHandler = getContentHandler();
347        if( nonNull( contentHandler ) )
348        {
349            //---* Sets the locator *------------------------------------------
350            /*
351             * m_Locator will never be null.
352             */
353            contentHandler.setDocumentLocator( m_Locator );
354
355            //---* Obtain a reader from the input source *---------------------
356            try( final var reader = createReader( requireNonNullArgument( input, "input" ) ) )
357            {
358                //---* Let someone else do the work *--------------------------
359                process( reader );
360            }
361        }
362    }   //  parse()
363
364    /**
365     *  Parses an XML document from a system identifier (URI).<br>
366     *  <br>This method is a shortcut for the common case of reading a
367     *  document from a system identifier. It is the exact equivalent of the
368     *  following:<br>
369     *  {@code parse( new InputSource( systemId ) );}<br>
370     *  <br>If the system identifier is a URL, it must be fully resolved by
371     *  the application before it is passed to the parser.
372     *
373     *  @param  systemId    The system identifier (URI).
374     *  @throws IOException An IO exception from the parser, possibly from a
375     *      byte stream or character stream supplied by the application.
376     *  @throws SAXException    Any SAX exception, possibly wrapping another
377     *      exception.
378     *
379     *  @see #parse(org.xml.sax.InputSource) parse( InputSource )
380     */
381    @Override
382    public final void parse( final String systemId ) throws IOException, SAXException { parse( new InputSource( systemId ) ); }
383
384    /**
385     *  This method has to be implemented in order to perform the parsing. It
386     *  will be called either from
387     *  {@link #parse(String) parse( String )}
388     *  or
389     *  {@link #parse(InputSource) parse( InputSource )}.<br>
390     *  <br>The implementation of this method should update the locator by
391     *  appropriate calls to
392     *  {@link #setPublicId(String) setPublicId()},
393     *  {@link #setSystemId(String) setSystemId()},
394     *  and
395     *  {@link #setLocation(int, int) setLocation()} -
396     *  if possible ...<br>
397     *  <br>The input is not meant to be XML, so it is difficult to describe
398     *  here how to parse the input. But assuming that the input stream is a
399     *  Java properties file, the implementation for {@code process()} might
400     *  look like this:
401     *  <pre><code>
402     *  protected void process( BufferedReader input ) throws IOException, SAXException
403     *  {
404     *      ContentHandler handler = getContentHandler();
405     *
406     *      //---* Load the properties *---------------------------------------
407     *      ExtendedProperties properties = new ExtendedProperties();
408     *      properties.load( input );
409     *
410     *      //---* Create the document *---------------------------------------
411     *      handler.startDocument();
412     *      handler.startElement( null, null, "properties", new AttributesImpl() );
413     *
414     *      //---* Process the properties *------------------------------------
415     *      AttributesImpl attributes;
416     *      char [] value;
417     *      // Each property will be treated as a value with the key as its
418     *      // attribute.
419     *      for( String name : properties.stringPropertyNames() )
420     *      {
421     *          //---* Start the element *-------------------------------------
422     *          attributes = new AttributesImpl();
423     *          attributes.addAttribute( null, null, "name", "ID", name );
424     *          handler.startElement( null, null, "property", attributes );
425     *
426     *          //---* The element contents *----------------------------------
427     *          value = properties.getProperty( name ).toCharArray();
428     *          handler.characters( value, 0, value.length );
429     *
430     *          //---* End the element *---------------------------------------
431     *          handler.endElement( null, null, "property" );
432     *      }
433     *
434     *      //---* Finish the document *---------------------------------------
435     *      handler.endElement( null, null, "properties" );
436     *      handler.endDocument();
437     *  }   //  process()
438     *  </code></pre>
439     *
440     *  @param  input   The input stream.
441     *  @throws IOException Problems reading the input stream.
442     *  @throws SAXException    Something has gone wrong.
443     */
444    protected abstract void process( BufferedReader input ) throws IOException, SAXException;
445
446    /**
447     *  Sets the content handler used by this XMLReader. It allows an
448     *  application to register a content event handler.<br>
449     *  <br>If the application does not register a content handler, all
450     *  content events reported by the SAX parser will be silently
451     *  ignored.<br>
452     *  <br>Applications may register a new or different handler in the middle
453     *  of a parse, and the SAX parser must begin using the new handler
454     *  immediately.
455     *
456     *  @param  handler  The content handler; may be {@code null}.
457     */
458    @Override
459    public final void setContentHandler( final ContentHandler handler )  { m_ContentHandler = handler; }
460
461    /**
462     *  Sets the DTD handler used by this XMLReader. Allows an application to
463     *  register a DTD event handler.<br>
464     *  <br>If the application does not register a DTD handler, all DTD events
465     *  reported by the SAX parser will be silently ignored.<br>
466     *  <br>Applications may register a new or different handler in the middle
467     *  of a parse, and the SAX parser must begin using the new handler
468     *  immediately.
469     *
470     *  @param  handler The DTD handler; may be {@code null}.
471     */
472    @Override
473    public final void setDTDHandler( final DTDHandler handler ) { m_DTDHandler = handler; }
474
475    /**
476     *  Sets the encoding for the input source. {@code null} means that
477     *  the platform specific encoding is used.
478     *
479     *  @param  encoding    The encoding to use; may be {@code null}.
480     */
481    public final void setEncoding( final String encoding ) { m_Encoding = encoding; }
482
483    /**
484     *  Sets the entity resolver that is used by this XMLReader. Allows an
485     *  application to register an entity resolver.<br>
486     *  <br>If the application does not register an entity resolver, the
487     *  XMLReader will perform its own default resolution.<br>
488     *  <br>Applications may register a new or different resolver in the
489     *  middle of a parse, and the SAX parser must begin using the new resolver
490     *  immediately.
491     *
492     *  @param  resolver    The entity resolver; may be {@code null}.
493     */
494    @Override
495    public final void setEntityResolver( final EntityResolver resolver ) { m_EntityResolver = resolver; }
496
497    /**
498     *  Sets the error handler that is used by this XMLReader. Allows an
499     *  application to register an error event handler.<br>
500     *  <br>If the application does not register an error handler, all error
501     *  events reported by the SAX parser will be written to
502     *  {@link System#err System.err}
503     *  and otherwise silently ignored; however, normal processing may not
504     *  continue. It is highly recommended that all SAX applications implement
505     *  an error handler to avoid unexpected bugs.<br>
506     *  <br>Applications may register a new or different handler in the middle
507     *  of a parse, and the SAX parser must begin using the new handler
508     *  immediately.
509     *
510     *  @param  handler The error handler; may be {@code null}.
511     *
512     *  @see DefaultErrorHandler
513     */
514    @Override
515    public final void setErrorHandler( final ErrorHandler handler )
516    {
517        m_ErrorHandler = nonNull( handler ) ? handler : DefaultErrorHandler.INSTANCE;
518    }   //  setErrorHandler()
519
520    /**
521     *  Sets the feature flag.
522     *
523     *  @param  name    The name of the feature.
524     *  @param  value   {@code true} if the feature should be supported
525     *      by this implementation, {@code false} if not.
526     *  @throws SAXNotRecognizedException   The feature value cannot be
527     *      assigned.
528     *  @throws SAXNotSupportedException    The XMLReader recognises the
529     *      feature name but cannot set the requested value.
530     *
531     *  @see #getFeature(String) getFeature()
532     */
533    @Override
534    public void setFeature( final String name, final boolean value ) throws SAXNotRecognizedException, SAXNotSupportedException
535    {
536        m_Features.put( requireNonNullArgument( name, "name" ), Boolean.valueOf( value ) );
537    }   //  setFeature()
538
539    /**
540     *  Sets the current location to the locator.
541     *
542     *  @param  lineNumber   The current line number.
543     *  @param  columnNumber    The current column number.
544     */
545    protected final void setLocation( final int lineNumber, final int columnNumber )
546    {
547        m_Locator.setLineNumber( lineNumber );
548        m_Locator.setColumnNumber( columnNumber );
549    }   //  setLocation()
550
551    /**
552     *  Sets the value of a property. Usually, the property name is any
553     *  fully-qualified URI. It is possible for an XMLReader to recognize a
554     *  property name but to be unable to change the current value. Some
555     *  property values may be immutable or mutable only in specific contexts,
556     *  such as before, during, or after a parse.<br>
557     *  <br>XMLReaders are not required to recognize setting any specific
558     *  property names, though a core set is defined by SAX2. But only this is
559     *  not implemented by this implementation.<br>
560     *  <br>This method is also the standard mechanism for setting extended
561     *  handlers.
562     *
563     *  @param name The property name, which is a fully-qualified URI.
564     *  @param value    The requested value for the property.
565     *  @throws SAXNotRecognizedException   The property value can't be
566     *      assigned or retrieved.
567     *  @throws SAXNotSupportedException    The XMLReader recognises the
568     *      property name but cannot set the requested value.
569     *
570     *  @see #getProperty(String) getProperty()
571     */
572    @Override
573    public void setProperty( final String name, final Object value ) throws SAXNotRecognizedException, SAXNotSupportedException
574    {
575        requireNonNullArgument( name, "name" );
576        if( nonNull( value ) )
577        {
578            m_Properties.put( name, value );
579        }
580        else
581        {
582            m_Properties.remove( name );
583        }
584    }   //  setProperty()
585
586    /**
587     *  Sets the public id to the locator.
588     *
589     *  @param  publicId    The value for the public id; may be {@code null}.
590     */
591    protected final void setPublicId( final String publicId ) { m_Locator.setPublicId( publicId ); }
592
593    /**
594     *  Sets the system id to the locator.
595     *
596     *  @param  systemId    The value for the system id; may be {@code null}.
597     */
598    protected final void setSystemId( final String systemId ) { m_Locator.setSystemId( systemId ); }
599}
600//  class AbstractXMLReader
601
602/*
603 *  End of File
604 */