001/*
002 * ============================================================================
003 * Copyright © 2002-2023 by Thomas Thrien.
004 * All Rights Reserved.
005 * ============================================================================
006 *
007 * Licensed to the public under the agreements of the GNU Lesser General Public
008 * License, version 3.0 (the "License"). You may obtain a copy of the License at
009 *
010 *      http://www.gnu.org/licenses/lgpl.html
011 *
012 * Unless required by applicable law or agreed to in writing, software
013 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
014 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
015 * License for the specific language governing permissions and limitations
016 * under the License.
017 */
018
019package org.tquadrat.foundation.xml.stringconverter;
020
021import static java.lang.String.format;
022import static org.apiguardian.api.API.Status.STABLE;
023import static org.tquadrat.foundation.lang.CommonConstants.UTF8;
024import static org.tquadrat.foundation.lang.Objects.isNull;
025import static org.tquadrat.foundation.lang.Objects.nonNull;
026import static org.tquadrat.foundation.util.StringUtils.isNotEmptyOrBlank;
027
028import javax.xml.parsers.DocumentBuilderFactory;
029import javax.xml.parsers.ParserConfigurationException;
030import javax.xml.transform.TransformerConfigurationException;
031import javax.xml.transform.TransformerException;
032import javax.xml.transform.TransformerFactory;
033import javax.xml.transform.dom.DOMSource;
034import javax.xml.transform.stream.StreamResult;
035import java.io.ByteArrayOutputStream;
036import java.io.IOException;
037import java.io.Serial;
038import java.io.StringReader;
039import java.io.UnsupportedEncodingException;
040import java.util.Collection;
041import java.util.List;
042
043import org.apiguardian.api.API;
044import org.tquadrat.foundation.annotation.ClassVersion;
045import org.tquadrat.foundation.exception.UnexpectedExceptionError;
046import org.tquadrat.foundation.lang.StringConverter;
047import org.tquadrat.foundation.xml.parse.NullErrorHandler;
048import org.w3c.dom.Document;
049import org.xml.sax.InputSource;
050import org.xml.sax.SAXException;
051
052/**
053 *  <p>{@summary An implementation of
054 *  {@link StringConverter}
055 *  for
056 *  {@link Document}
057 *  values.}</p>
058 *  <p>The method
059 *  {@link #fromString(CharSequence)}
060 *  parses the input to a document, using the default
061 *  {@link DocumentBuilderFactory}
062 *  to get a
063 *  {@link javax.xml.parsers.DocumentBuilder}.</p>
064 *  <p>{@link #toString(Document)}
065 *  uses an instance of
066 *  {@link javax.xml.transform.Transformer}
067 *  to get a String from the document.</p>
068 *
069 *  @note When converting a String to an instance of {@code Document} back to a
070 *      String, the final String may be different from the input because the
071 *      formatting can differ, and some implicit defaults will now be provided
072 *      as explicit setting. Only the semantics are guaranteed to be still the
073 *      same.
074 *
075 *  @extauthor Thomas Thrien - thomas.thrien@tquadrat.org
076 *  @version $Id: DocumentStringConverter.java 1072 2023-09-30 20:44:38Z tquadrat $
077 *  @since 0.1.0
078 *
079 *  @UMLGraph.link
080 */
081@ClassVersion( sourceVersion = "$Id: DocumentStringConverter.java 1072 2023-09-30 20:44:38Z tquadrat $" )
082@API( status = STABLE, since = "0.1.0" )
083@SuppressWarnings( "exports" )
084public final class DocumentStringConverter implements StringConverter<Document>
085{
086        /*-----------*\
087    ====** Constants **========================================================
088        \*-----------*/
089    /**
090     *  The error message for an invalid XML contents: {@value}.
091     */
092    public static final String MSG_InvalidXML = "Invalid XML: %1$s";
093
094        /*------------------------*\
095    ====** Static Initialisations **===========================================
096        \*------------------------*/
097    /**
098     *  The serial version UID for objects of this class: {@value}.
099     *
100     *  @hidden
101     */
102    @Serial
103    private static final long serialVersionUID = 1L;
104
105    /**
106     *  An instance of this class.
107     */
108    public static final DocumentStringConverter INSTANCE = new DocumentStringConverter();
109
110        /*--------------*\
111    ====** Constructors **=====================================================
112        \*--------------*/
113    /**
114     *  Creates a new {@code DocumentStringConverter} instance.
115     */
116    public DocumentStringConverter() { super(); }
117
118        /*---------*\
119    ====** Methods **==========================================================
120        \*---------*/
121    /**
122     *  {@inheritDoc}
123     */
124    @Override
125    public final Document fromString( final CharSequence source ) throws IllegalArgumentException
126    {
127        Document retValue = null;
128        if( nonNull( source ) )
129        {
130            try
131            {
132                final var documentBuilderFactory = DocumentBuilderFactory.newInstance();
133                documentBuilderFactory.setNamespaceAware( true );
134                documentBuilderFactory.setValidating( false );
135                documentBuilderFactory.setIgnoringComments( false );
136                documentBuilderFactory.setIgnoringElementContentWhitespace( true );
137                documentBuilderFactory.setCoalescing( false );
138                documentBuilderFactory.setExpandEntityReferences( false );
139                final var builder = documentBuilderFactory.newDocumentBuilder();
140                builder.setErrorHandler( NullErrorHandler.INSTANCE );
141                retValue = builder.parse( new InputSource( new StringReader( source.toString() ) ) );
142                retValue.normalizeDocument();
143            }
144            catch( final ParserConfigurationException e )
145            {
146                throw new UnexpectedExceptionError( "Cannot instantiate DocumentBuilder", e );
147            }
148            catch( final SAXException e )
149            {
150                throw new IllegalArgumentException( format( MSG_InvalidXML, source ), e );
151            }
152            catch( final IOException e )
153            {
154                throw new UnexpectedExceptionError( e );
155            }
156        }
157
158        //---* Done *----------------------------------------------------------
159        return retValue;
160    }   //  fromString()
161
162    /**
163     *  Provides the subject class for this converter.
164     *
165     * @return The subject class.
166     */
167    @SuppressWarnings( "PublicMethodNotExposedInInterface" )
168    public final Collection<Class<? extends Document>> getSubjectClass() { return List.of( Document.class ); }
169
170    /**
171     *  This method is used by the
172     *  {@link java.util.ServiceLoader}
173     *  to obtain the instance for this
174     *  {@link org.tquadrat.foundation.lang.StringConverter}
175     *  implementation.
176     *
177     *  @return The instance for this {@code StringConverter} implementation.
178     */
179    public static final DocumentStringConverter provider() { return INSTANCE; }
180
181    /**
182     *  {@inheritDoc}
183     */
184    @SuppressWarnings( "OverlyComplexMethod" )
185    @Override
186    public final String toString( final Document source )
187    {
188        String retValue = null;
189        if( nonNull( source ) )
190        {
191            final var encoding = source.getXmlEncoding();
192            try
193            {
194                //---* Obtain the transformer *--------------------------------
195                final var transformerFactory = TransformerFactory.newInstance();
196                final var transformer = transformerFactory.newTransformer();
197
198                //---* Configure the transformer *-----------------------------
199                transformer.setOutputProperty( "method", "xml" );
200                transformer.setOutputProperty( "indent", "no" );
201                transformer.setOutputProperty( "standalone", source.getXmlStandalone() ? "yes" : "no" );
202                if( nonNull( encoding ) ) transformer.setOutputProperty( "encoding", encoding );
203                final var doctype = source.getDoctype();
204                if( nonNull( doctype) )
205                {
206                    final var systemId = doctype.getSystemId();
207                    if( isNotEmptyOrBlank( systemId ) ) transformer.setOutputProperty( "doctype-system", systemId );
208                    final var publicId = doctype.getPublicId();
209                    if( isNotEmptyOrBlank( publicId ) ) transformer.setOutputProperty( "doctype-public", publicId );
210                }
211
212                final var documentSource = new DOMSource( source );
213                final var outputStream = new ByteArrayOutputStream();
214                final var result = new StreamResult( outputStream );
215                transformer.transform( documentSource, result );
216                retValue = outputStream.toString( isNull( encoding ) ? UTF8.name() : encoding );
217            }
218            catch( final TransformerConfigurationException e )
219            {
220                throw new UnexpectedExceptionError( "Cannot instantiate Transformer", e );
221            }
222            catch( final TransformerException e )
223            {
224                throw new IllegalArgumentException( "Unrecoverable error on transformation", e );
225            }
226            catch( final UnsupportedEncodingException e )
227            {
228                throw new UnexpectedExceptionError( "Invalid encoding: %s".formatted( encoding ), e );
229            }
230        }
231
232        //---* Done *----------------------------------------------------------
233        return retValue;
234    }   //  toString()
235}
236//  class DocumentStringConverter
237
238/*
239 *  End of File
240 */