001/* 002 * ============================================================================ 003 * Copyright © 20002-2026 by Thomas Thrien. 004 * All Rights Reserved. 005 * ============================================================================ 006 * Licensed to the public under the agreements of the GNU Lesser General Public 007 * License, version 3.0 (the "License"). You may obtain a copy of the License at 008 * 009 * http://www.gnu.org/licenses/lgpl.html 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017 018package org.tquadrat.foundation.util; 019 020import org.apiguardian.api.API; 021import org.tquadrat.foundation.annotation.ClassVersion; 022import org.tquadrat.foundation.annotation.UtilityClass; 023import org.tquadrat.foundation.exception.CharSequenceTooLongException; 024import org.tquadrat.foundation.exception.EmptyArgumentException; 025import org.tquadrat.foundation.exception.ImpossibleExceptionError; 026import org.tquadrat.foundation.exception.NullArgumentException; 027import org.tquadrat.foundation.exception.PrivateConstructorForStaticClassCalledError; 028import org.tquadrat.foundation.exception.ValidationException; 029import org.tquadrat.foundation.lang.Objects; 030 031import java.io.IOException; 032import java.io.UnsupportedEncodingException; 033import java.util.ArrayList; 034import java.util.Arrays; 035import java.util.Collection; 036import java.util.List; 037import java.util.Optional; 038import java.util.SequencedCollection; 039import java.util.function.Supplier; 040import java.util.regex.Pattern; 041import java.util.regex.PatternSyntaxException; 042import java.util.stream.Stream; 043import java.util.stream.Stream.Builder; 044 045import static java.lang.Character.charCount; 046import static java.lang.Character.isISOControl; 047import static java.lang.Character.isValidCodePoint; 048import static java.lang.Character.isWhitespace; 049import static java.lang.Character.toChars; 050import static java.lang.Character.toLowerCase; 051import static java.lang.Character.toTitleCase; 052import static java.lang.Character.toUpperCase; 053import static java.lang.Integer.min; 054import static java.net.URLDecoder.decode; 055import static java.net.URLEncoder.encode; 056import static java.text.Normalizer.Form.NFD; 057import static java.text.Normalizer.normalize; 058import static java.util.regex.Pattern.DOTALL; 059import static java.util.regex.Pattern.compile; 060import static org.apiguardian.api.API.Status.STABLE; 061import static org.tquadrat.foundation.lang.CommonConstants.CHAR_ELLIPSIS; 062import static org.tquadrat.foundation.lang.CommonConstants.EMPTY_STRING; 063import static org.tquadrat.foundation.lang.CommonConstants.NULL_CHAR; 064import static org.tquadrat.foundation.lang.CommonConstants.UTF8; 065import static org.tquadrat.foundation.lang.Objects.isNull; 066import static org.tquadrat.foundation.lang.Objects.nonNull; 067import static org.tquadrat.foundation.lang.Objects.requireNonNullArgument; 068import static org.tquadrat.foundation.lang.Objects.requireNotEmptyArgument; 069import static org.tquadrat.foundation.util.CharSetUtils.escapeCharacter; 070import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_ABBREVIATE; 071import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_ABBREVIATE_MIDDLE; 072import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_CUT; 073import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_NONE; 074import static org.tquadrat.foundation.util.StringUtils.Padding.PADDING_CENTER; 075import static org.tquadrat.foundation.util.StringUtils.Padding.PADDING_LEFT; 076import static org.tquadrat.foundation.util.StringUtils.Padding.PADDING_RIGHT; 077import static org.tquadrat.foundation.util.internal.Entities.HTML50; 078import static org.tquadrat.foundation.util.internal.Entities.XML; 079 080/** 081 * Library of utility methods that are useful when dealing with Strings. <br> 082 * <br>Parts of the code were adopted from the class 083 * <code>org.apache.commons.lang.StringUtils</code> and modified to match the 084 * requirements of this project. In particular, these are the methods 085 * <ul> 086 * <li>{@link #abbreviate(CharSequence, int) abbreviate()}</li> 087 * <li>{@link #capitalize(CharSequence) capitalize()}</li> 088 * <li>{@link #escapeHTML(CharSequence) escapeHTML()} in both versions</li> 089 * <li>{@link #isEmpty(CharSequence) isEmpty()}</li> 090 * <li>{@link #isNotEmpty(CharSequence) isNotEmpty()}</li> 091 * <li>{@link #repeat(CharSequence, int) repeat()}</li> 092 * <li>{@link #unescapeHTML(CharSequence) unescapeHTML()} in both versions</li> 093 * </ul> 094 * 095 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 096 * @version $Id: StringUtils.java 1163 2026-03-20 15:28:33Z tquadrat $ 097 * @since 0.0.3 098 * 099 * @UMLGraph.link 100 */ 101@SuppressWarnings( {"ClassWithTooManyMethods", "OverlyComplexClass"} ) 102@ClassVersion( sourceVersion = "$Id: StringUtils.java 1163 2026-03-20 15:28:33Z tquadrat $" ) 103@UtilityClass 104public final class StringUtils 105{ 106 /*---------------*\ 107 ====** Inner Classes **==================================================== 108 \*---------------*/ 109 /** 110 * The clipping mode that is used for the method 111 * {@link StringUtils#pad(CharSequence,int,char,Padding,Clipping)} 112 * 113 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 114 * @version $Id: StringUtils.java 1163 2026-03-20 15:28:33Z tquadrat $ 115 * @since 0.0.3 116 * 117 * @UMLGraph.link 118 */ 119 @SuppressWarnings( "InnerClassTooDeeplyNested" ) 120 @ClassVersion( sourceVersion = "$Id: StringUtils.java 1163 2026-03-20 15:28:33Z tquadrat $" ) 121 @API( status = STABLE, since = "0.0.5" ) 122 public static enum Clipping 123 { 124 /*------------------*\ 125 ====** Enum Declaration **============================================= 126 \*------------------*/ 127 /** 128 * If an input String is already longer than the target length, it 129 * will be returned unchanged. 130 */ 131 CLIPPING_NONE 132 { 133 /** 134 * {@inheritDoc} 135 */ 136 @Override 137 protected final String clip( final CharSequence input, final int length ) { return input.toString(); } 138 }, 139 140 /** 141 * If an input String is longer than the target length, it will be 142 * just shortened to that length. 143 */ 144 CLIPPING_CUT 145 { 146 /** 147 * {@inheritDoc} 148 */ 149 @Override 150 protected final String clip( final CharSequence input, final int length ) 151 { 152 final var retValue = ( 153 input.length() > length ? input.subSequence( 0, length ) : 154 input).toString(); 155 156 //---* Done *-------------------------------------------------- 157 return retValue; 158 } // clip() 159 }, 160 161 /** 162 * If an input String is longer than the target length, it will be 163 * abbreviated to that length, by calling 164 * {@link StringUtils#abbreviate(CharSequence, int)} 165 * with that String. The minimum length for the padded String is 4. 166 */ 167 CLIPPING_ABBREVIATE 168 { 169 /** 170 * {@inheritDoc} 171 */ 172 @Override 173 protected final String clip( final CharSequence input, final int length ) { return abbreviate( input, length ); } 174 }, 175 176 /** 177 * If an input String is longer than the target length, it will be 178 * abbreviated to that length, by calling 179 * {@link StringUtils#abbreviateMiddle(CharSequence, int)} 180 * with that String. The minimum length for the padded String is 5. 181 */ 182 CLIPPING_ABBREVIATE_MIDDLE 183 { 184 /** 185 * {@inheritDoc} 186 */ 187 @Override 188 protected final String clip( final CharSequence input, final int length ) { return abbreviateMiddle( input, length ); } 189 }; 190 191 /*---------*\ 192 ====** Methods **====================================================== 193 \*---------*/ 194 /** 195 * Clips the given input String. 196 * 197 * @param input The input String. 198 * @param length The target length. 199 * @return The result String. 200 */ 201 protected abstract String clip( final CharSequence input, final int length ); 202 } 203 // enum Clipping 204 205 /** 206 * The padding mode that is used for the methods 207 * {@link StringUtils#pad(CharSequence,int,char,Padding,boolean)} 208 * and 209 * {@link StringUtils#pad(CharSequence,int,char,Padding,Clipping)} 210 * 211 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 212 * @version $Id: StringUtils.java 1163 2026-03-20 15:28:33Z tquadrat $ 213 * @since 0.0.5 214 * 215 * @UMLGraph.link 216 */ 217 @SuppressWarnings( "InnerClassTooDeeplyNested" ) 218 @ClassVersion( sourceVersion = "$Id: StringUtils.java 1163 2026-03-20 15:28:33Z tquadrat $" ) 219 @API( status = STABLE, since = "0.0.5" ) 220 public static enum Padding 221 { 222 /*------------------*\ 223 ====** Enum Declaration **============================================= 224 \*------------------*/ 225 /** 226 * The pad characters are distributed evenly at begin and end of the 227 * string. 228 */ 229 PADDING_CENTER 230 { 231 /** 232 * {@inheritDoc} 233 */ 234 @Override 235 protected final String pad( final CharSequence input, final int padSize, final char c ) 236 { 237 final var rightSize = padSize / 2; 238 final var leftSize = padSize - rightSize; 239 final var retValue = padding( leftSize, c ) + input.toString() + padding( rightSize, c ); 240 241 //---* Done *-------------------------------------------------- 242 return retValue; 243 } // pad() 244 }, 245 246 /** 247 * The pad characters are added at the beginning of the string 248 * (prefixing it). 249 */ 250 PADDING_LEFT 251 { 252 /** 253 * {@inheritDoc} 254 */ 255 @Override 256 protected final String pad( final CharSequence input, final int padSize, final char c ) 257 { 258 return padding( padSize, c ) + input.toString(); 259 } // pad() 260 }, 261 262 /** 263 * The pad characters are added the end of the string (as a suffix). 264 */ 265 PADDING_RIGHT 266 { 267 /** 268 * {@inheritDoc} 269 */ 270 @Override 271 protected final String pad( final CharSequence input, final int padSize, final char c ) 272 { 273 return input.toString() + padding( padSize, c ); 274 } // pad() 275 }; 276 277 /*---------*\ 278 ====** Methods **====================================================== 279 \*---------*/ 280 /** 281 * Pads the given input String. 282 * 283 * @param input The input String. 284 * @param padSize The pad size. 285 * @param c The pad character. 286 * @return The result String. 287 */ 288 protected abstract String pad( final CharSequence input, final int padSize, final char c ); 289 290 /** 291 * <p>{@summary Returns padding using the specified pad character repeated to the 292 * given length.}</p> 293 * <br><code> 294 * Padding.padding( 0, 'e' ) ⇒ ""<br> 295 * Padding.padding( 3, 'e' ) ⇒ "eee"<br> 296 * Padding.padding( -2, 'e' ) ⇒ IndexOutOfBoundsException<br> 297 * </code> 298 * 299 * @param repeat Number of times to repeat {@code padChar}; must be 300 * 0 or greater. 301 * @param padChar Character to repeat. 302 * @return String with repeated {@code padChar} character, or the 303 * empty String if {@code repeat} is 0. 304 * @throws IndexOutOfBoundsException {@code repeat} is less than 0. 305 * 306 * @see StringUtils#repeat(int,int) 307 */ 308 private static String padding( final int repeat, final char padChar ) throws IndexOutOfBoundsException 309 { 310 if( repeat < 0 ) throw new IndexOutOfBoundsException( MSG_PadNegative.formatted( repeat ) ); 311 312 final var retValue = Character.toString( padChar ).repeat( repeat ).intern(); 313 314 //---* Done *------------------------------------------------------ 315 return retValue; 316 } // padding() 317 } 318 // enum Padding 319 320 /*-----------*\ 321 ====** Constants **======================================================== 322 \*-----------*/ 323 /** 324 * <p>{@summary The regular expression for an HTML or XML comment: 325 * {@value}.}</p> 326 * <p>This pattern is used by the 327 * {@link #stripXMLComments(CharSequence)} 328 * method.</p> 329 * 330 * @since 0.0.5 331 */ 332 @API( status = STABLE, since = "0.0.5" ) 333 public static final String COMMENTREMOVAL_PATTERN = "<!--.+?-->"; 334 335 /** 336 * The message text indicating that the given value for the abbreviation 337 * target length is too short. 338 */ 339 private static final String MSG_AbbrTooShort = "The minimum abbreviation width is %d"; 340 341 /** 342 * The message indicating that the give size for padding is negative. 343 */ 344 private static final String MSG_PadNegative = "Cannot pad a negative amount: %d"; 345 346 /** 347 * The maximum size to which the padding constant(s) can expand: {@value}. 348 * 349 * @see #repeat(CharSequence,int) repeat() for String 350 * @see #repeat(char,int) repeat() for char 351 */ 352 @SuppressWarnings( "unused" ) 353 private static final int PAD_LIMIT = 8192; 354 355 /** 356 * The regular expression for an HTML or XML tag: {@value}.<br> 357 * <br>This pattern is used by the 358 * {@link #stripTags(CharSequence)} 359 * method.<br> 360 * <br>As HTML/XML comments may contain a "greater than" sign 361 * ('>' or '&gt;'), it is necessary to treat comments 362 * separately.<br> 363 * <br>Just as a reminder: several sources recommend using the following 364 * idiom for embedded JavaScript:<pre><code><script> 365 * <!-- 366 * <i>JavaScript code </i> 367 * --> 368 * </script></code></pre> 369 * 370 * @since 0.0.5 371 */ 372 @SuppressWarnings( "RegExpUnnecessaryNonCapturingGroup" ) 373 @API( status = STABLE, since = "0.0.5" ) 374 public static final String TAGREMOVAL_PATTERN = "(?:<!--.+?-->)|(?:<[^>]+?>)"; 375 376 /*------------------------*\ 377 ====** Static Initialisations **=========================================== 378 \*------------------------*/ 379 /** 380 * The pattern that is used to identify an HTML or XML comment. 381 * 382 * @see #stripXMLComments(CharSequence) 383 * @see #COMMENTREMOVAL_PATTERN 384 */ 385 private static final Pattern m_CommentRemovalPattern; 386 387 /** 388 * The pattern that is used to identify an HTML or XML tag. 389 * 390 * @see #stripTags(CharSequence) 391 * @see #TAGREMOVAL_PATTERN 392 */ 393 private static final Pattern m_TagRemovalPattern; 394 395 static 396 { 397 //---* The regex patterns *-------------------------------------------- 398 try 399 { 400 m_CommentRemovalPattern = compile( COMMENTREMOVAL_PATTERN, DOTALL ); 401 m_TagRemovalPattern = compile( TAGREMOVAL_PATTERN, DOTALL ); 402 } 403 catch( final PatternSyntaxException e ) 404 { 405 throw new ImpossibleExceptionError( "The patterns are constant values that have been tested", e ); 406 } 407 } 408 409 /*--------------*\ 410 ====** Constructors **===================================================== 411 \*--------------*/ 412 /** 413 * No instance of this class is allowed. 414 */ 415 private StringUtils() { throw new PrivateConstructorForStaticClassCalledError( StringUtils.class ); } 416 417 /*---------*\ 418 ====** Methods **========================================================== 419 \*---------*/ 420 /** 421 * <p>{@summary Abbreviates a String using ellipses (Unicode HORIZONTAL 422 * ELLIPSIS, 0x2026).} This will turn "<i>Now is the time for all 423 * good men</i>" into "<i>Now is the time for…</i>".</p> 424 * <p>Specifically:</p> 425 * <ul> 426 * <li>If {@code text} is less than {@code maxWidth} characters long, 427 * return it unchanged.</li> 428 * <li>Else abbreviate it to <code>(substring( text, 0, max - 1 ) + 429 * "…" )</code>.</li> 430 * <li>If {@code maxWidth} is less than 4, throw an 431 * {@link ValidationException}.</li> 432 * <li>In no case it will return a String of length greater than 433 * {@code maxWidth}.</li> 434 * </ul> 435 * <p>Some samples:</p> 436 * <pre><code> 437 * StringUtils.abbreviate( null, * ) = null 438 * StringUtils.abbreviate( "", 4 ) = "" 439 * StringUtils.abbreviate( "abc", 4 ) = "abc" 440 * StringUtils.abbreviate( "abcd", 4 ) = "abcd;" 441 * StringUtils.abbreviate( "abcdefg", 4 ) = "abc…" 442 * StringUtils.abbreviate( "abcdefg", 7 ) = "abcdefg" 443 * StringUtils.abbreviate( "abcdefg", 8 ) = "abcdefg" 444 * StringUtils.abbreviate( "abcdefg", 3 ) = IllegalArgumentException 445 * </code></pre> 446 * 447 * @param text The String to abbreviate, can be {@code null}. 448 * @param maxWidth The maximum length of result String, must be at 449 * least 4. 450 * @return The abbreviated String, or {@code null} if the input was 451 * already {@code null}. 452 * @throws ValidationException The value for {@code maxWidth} was less 453 * than 4. 454 * 455 * @since 0.0.5 456 */ 457 @API( status = STABLE, since = "0.0.5" ) 458 public static final String abbreviate( final CharSequence text, final int maxWidth ) throws ValidationException 459 { 460 return abbreviate( text, 0, maxWidth ); 461 } // abbreviate() 462 463 /** 464 * Abbreviates a String using ellipses (Unicode HORIZONTAL ELLIPSIS, 465 * 0x2026). This will turn "<i>Now is the time for all good 466 * men</i>" into "<i>…is the time 467 * for…</i>".<br> 468 * <br>Works like 469 * {@link #abbreviate(CharSequence, int)}, 470 * but allows to specify a "left edge" offset. Note that this 471 * left edge is not necessarily going to be the leftmost character in the 472 * result, or the first character following the ellipses, but it will 473 * appear somewhere in the result. An offset less than 0 will be treated 474 * as 0, a value greater than {@code maxWidth} will be ignored.<br> 475 * <br>In no case will it return a String of length greater than 476 * {@code maxWidth}.<br> 477 * <br>Some samples:<br> 478 * <pre> 479 * StringUtils.abbreviate( null, *, * ) = null 480 * StringUtils.abbreviate( "", 0, 4 ) = "" 481 * StringUtils.abbreviate( "abcdefghijklmno", -1, 10 ) = "abcdefghi…" 482 * StringUtils.abbreviate( "abcdefghijklmno", 0, 10 ) = "abcdefghi…" 483 * StringUtils.abbreviate( "abcdefghijklmno", 1, 10 ) = "abcdefghi…" 484 * StringUtils.abbreviate( "abcdefghijklmno", 4, 10 ) = "…efghijkl…" 485 * StringUtils.abbreviate( "abcdefghijklmno", 5, 10 ) = "…fghijklm…" 486 * StringUtils.abbreviate( "abcdefghijklmno", 6, 10 ) = "…ghijklmno" 487 * StringUtils.abbreviate( "abcdefghijklmno", 8, 10 ) = "…ghijklmno" 488 * StringUtils.abbreviate( "abcdefghijklmno", 10, 10 ) = "…ghijklmno" 489 * StringUtils.abbreviate( "abcdefghijklmno", 12, 10 ) = "…ghijklmno" 490 * StringUtils.abbreviate( "abcdefghij", 0, 3 ) = IllegalArgumentException 491 * StringUtils.abbreviate( "abcdefghij", 5, 6 ) = IllegalArgumentException 492 * </pre> 493 * 494 * @param text The String to process, can be {@code null}. 495 * @param offset The left edge of the source String; this value will not 496 * be checked. 497 * @param maxWidth The maximum length of result String, must be at 498 * least 4. 499 * @return The abbreviated String, or {@code null} if the input was 500 * already {@code null}. 501 * @throws ValidationException The value for {@code maxWidth} was less 502 * than 4. 503 * 504 * @since 0.0.5 505 */ 506 @API( status = STABLE, since = "0.0.5" ) 507 public static final String abbreviate( final CharSequence text, final int offset, final int maxWidth ) throws ValidationException 508 { 509 final var ellipsis = Character.toString( CHAR_ELLIPSIS ).intern(); 510 511 String retValue = null; 512 if( nonNull( text ) ) 513 { 514 if( maxWidth < 4 ) throw new ValidationException( String.format( MSG_AbbrTooShort, 4 ) ); 515 516 final var len = text.length(); 517 if( len > maxWidth ) 518 { 519 var effectiveOffset = min( offset, len); 520 if( (len - effectiveOffset) < (maxWidth - 1)) 521 { 522 effectiveOffset = len - (maxWidth - 1); 523 } 524 if( effectiveOffset <= 1 ) 525 { 526 retValue = text.subSequence( 0, maxWidth - 1 ) + ellipsis; 527 } 528 else 529 { 530 if( ((effectiveOffset + maxWidth) - 1) < len ) 531 { 532 retValue = ellipsis + abbreviate( text.subSequence( effectiveOffset, len ), maxWidth - 1 ); 533 } 534 else 535 { 536 retValue = ellipsis + text.subSequence( len - (maxWidth - 1), len ); 537 } 538 } 539 } 540 else 541 { 542 retValue = text.toString(); 543 } 544 } 545 546 //---* Done *---------------------------------------------------------- 547 return retValue; 548 } // abbreviate() 549 550 /** 551 * <p>{@summary Abbreviates a String using ellipses (Unicode HORIZONTAL 552 * ELLIPSIS, 0x2026) in the middle of the returned text.} This will turn 553 * "<i>Now is the time for all good men</i>" into "<i>Now 554 * is … good men</i>".</p> 555 * <p>Works like 556 * {@link #abbreviate(CharSequence, int)}.</p> 557 * <p>In no case will it return a String of length greater than 558 * {@code maxWidth}.</p> 559 * <p>Some samples:</p> 560 * <pre> 561 * StringUtils.abbreviateMiddle(null, *) = null 562 * StringUtils.abbreviateMiddle("", 5) = "" 563 * StringUtils.abbreviateMiddle("abcdefgh", 5) = "ab…gh" 564 * StringUtils.abbreviateMiddle("abcdefgh", 7) = "ab…gh" 565 * StringUtils.abbreviateMiddle("abcdefgh", 8) = "abcdefgh" 566 * StringUtils.abbreviateMiddle("abcdefgh", 4) = IllegalArgumentException 567 * </pre> 568 * 569 * @param input The String to check, may be {@code null}. 570 * @param maxWidth The maximum length of result String, must be at 571 * least 5. 572 * @return The abbreviated String, or {@code null} if the input was 573 * already {@code null}. 574 * @throws ValidationException The value for {@code maxWidth} was less 575 * than 5. 576 * 577 * @since 0.0.5 578 */ 579 @API( status = STABLE, since = "0.0.5" ) 580 public static final String abbreviateMiddle( final CharSequence input, final int maxWidth ) 581 { 582 final var ellipsis = Character.toString( CHAR_ELLIPSIS ).intern(); 583 584 String retValue = null; 585 if( nonNull( input ) ) 586 { 587 if( maxWidth < 5 ) throw new ValidationException( String.format( MSG_AbbrTooShort, 5 ) ); 588 589 final var len = input.length(); 590 if( len > maxWidth ) 591 { 592 final var suffixLength = (maxWidth - 1) / 2; 593 final var prefixLength = maxWidth - 1 - suffixLength; 594 final var suffixStart = len - suffixLength; 595 retValue = input.subSequence( 0, prefixLength ) + ellipsis + input.subSequence( suffixStart, suffixStart + suffixLength ); 596 } 597 else 598 { 599 retValue = input.toString(); 600 } 601 } 602 603 //---* Done *---------------------------------------------------------- 604 return retValue; 605 } // abbreviateMiddle() 606 607 /** 608 * <p>{@summary Breaks a long string into chunks of the given length.}</p> 609 * <p>This method returns an instance of 610 * {@link Stream} that can be easily converted into an array or a 611 * collection.</p> 612 * <p>To array:</p> 613 * <div class="source-container"><pre>breakString( <<i>string</i>>, <<i>chunk</i>> ).toArray( String []::new )</pre></div> 614 * <p>To collection (here: a 615 * {@link List}):</p> 616 * <div class="source-container"><pre>breakString( <<i>string</i>>, <<i>chunk</i>> ).collect( Collectors.toList() )</pre></div> 617 * 618 * @param input The string. 619 * @param chunk The chunk size. 620 * @return The chunks from the string; the last chunk could be shorter 621 * than the others. 622 * 623 * @see Stream#toArray(java.util.function.IntFunction) 624 * @see Stream#collect(java.util.stream.Collector) 625 * @see java.util.stream.Collectors#toList() 626 * 627 * @since 0.0.5 628 */ 629 @API( status = STABLE, since = "0.0.5" ) 630 public static final Stream<String> breakString( final CharSequence input, final int chunk ) 631 { 632 if( chunk < 1 ) throw new ValidationException( "Chunk size must not be zero or a negative number: %d".formatted( chunk ) ); 633 634 final Builder<String> builder = Stream.builder(); 635 final var len = requireNonNullArgument( input, "input" ).length(); 636 var pos = 0; 637 while( (pos + chunk) < len ) 638 { 639 builder.add( input.subSequence( pos, pos + chunk ).toString() ); 640 pos += chunk; 641 } 642 if( pos < len ) builder.add( input.subSequence( pos, len ).toString() ); 643 644 final var retValue = builder.build(); 645 646 //---* Done *---------------------------------------------------------- 647 return retValue; 648 } // breakString() 649 650 /** 651 * <p>{@summary Breaks a text into lines of the given length, but 652 * different from 653 * {@link #breakString(CharSequence, int)}, 654 * it will honour whitespace.}</p> 655 * <p>This method returns an instance of 656 * {@link Stream} that can be easily converted into an array, a String, or 657 * a collection.</p> 658 * <p>To array:</p> 659 * <div class="source-container"><pre>breakText( <<i>text</i>>, <<i>len</i>> ).toArray( String []::new )</pre></div> 660 * <p>To String:</p> 661 * <div class="source-container"><pre>breakText( <<i>text</i>>, <<i>len</i>> ).collect( Collectors.joining() )</pre></div> 662 * <p>To collection (here: a 663 * {@link List}):</p> 664 * <div class="source-container"><pre>breakText( <<i>text</i>>, <<i>len</i>> ).collect( Collectors.toList() )</pre></div> 665 * 666 * @param text The text. 667 * @param lineLength The length of a line. 668 * @return The lines; if a word is longer than the given line length, a 669 * line containing only that word can be longer that the given line 670 * length. 671 * 672 * @see Stream#toArray(java.util.function.IntFunction) 673 * @see Stream#collect(java.util.stream.Collector) 674 * @see java.util.stream.Collectors#joining() 675 * @see java.util.stream.Collectors#joining(CharSequence) 676 * @see java.util.stream.Collectors#joining(CharSequence, CharSequence, CharSequence) 677 * @see java.util.stream.Collectors#toList() 678 * 679 * @since 0.0.5 680 */ 681 @API( status = STABLE, since = "0.0.5" ) 682 public static final Stream<String> breakText( final CharSequence text, final int lineLength ) 683 { 684 if( lineLength < 1 ) throw new ValidationException( "Line length size must not be zero or a negative number: %d".formatted( lineLength ) ); 685 686 final Builder<String> builder = Stream.builder(); 687 688 for( final var line : splitString( requireNonNullArgument( text, "text" ), '\n' ) ) 689 { 690 if( isEmptyOrBlank( line ) ) 691 { 692 builder.add( EMPTY_STRING ); 693 } 694 else 695 { 696 final var buffer = new StringBuilder(); 697 final var chunks = line.split( "\\s" ); 698 SplitLoop: for( final var chunk : chunks ) 699 { 700 if( chunk.isEmpty() ) continue SplitLoop; 701 if( (buffer.length() + 1 + chunk.length()) < lineLength ) 702 { 703 if( isNotEmpty( buffer) ) buffer.append( ' ' ); 704 } 705 else 706 { 707 if( isNotEmpty( buffer ) ) 708 { 709 builder.add( buffer.toString() ); 710 buffer.setLength( 0 ); 711 } 712 } 713 buffer.append( chunk ); 714 } // SplitLoop: 715 if( isNotEmpty( buffer ) ) builder.add( buffer.toString() ); 716 } 717 } 718 719 final var retValue = builder.build(); 720 721 //---* Done *---------------------------------------------------------- 722 return retValue; 723 } // breakText() 724 725 /** 726 * <p>{@summary <i>Capitalises</i> a String, meaning changing the first 727 * letter to upper case as per 728 * {@link Character#toUpperCase(char)}.} No other letters are changed.</p> 729 * <p>A {@code null} input String returns {@code null}.</p> 730 * <p>Samples:</p> 731 * <pre><code> 732 * StringUtils.capitalize( null ) == null; 733 * StringUtils.capitalize( "" ) == ""; 734 * StringUtils.capitalize( "cat" ) == "Cat"; 735 * StringUtils.capitalize( "cAt" ) == "CAt";</code></pre> 736 * <p>Use this function to create a getter or setter name from the name of 737 * the attribute.</p> 738 * <p>This method does not recognise the 739 * {@linkplain java.util.Locale#getDefault() default locale}. 740 * This means that "istanbul" will become "Istanbul" 741 * even for the locale {@code tr_TR} (although "İstanbul" 742 * would be correct).</p> 743 * 744 * @param input The String to capitalise, can be {@code null}. 745 * @return The capitalised String, or {@code null} if the argument 746 * was already {@code null}. 747 * 748 * @see #decapitalize(CharSequence) 749 * 750 * @since 0.0.5 751 */ 752 @API( status = STABLE, since = "0.0.5" ) 753 public static final String capitalize( final CharSequence input ) 754 { 755 String retValue = null; 756 if( isNotEmpty( input ) ) 757 { 758 final var str = input.toString(); 759 final var firstCodePoint = str.codePointAt( 0 ); 760 final var newCodePoint = toUpperCase( firstCodePoint ); 761 if( firstCodePoint == newCodePoint ) 762 { 763 retValue = str; 764 } 765 else 766 { 767 final var strLen = str.length(); 768 final var newCodePoints = new int [strLen]; 769 var outOffset = 0; 770 newCodePoints [outOffset++] = newCodePoint; 771 //noinspection ForLoopWithMissingComponent 772 for( var inOffset = charCount( firstCodePoint ); inOffset < strLen; ) 773 { 774 final var codePoint = str.codePointAt( inOffset ); 775 newCodePoints [outOffset++] = codePoint; 776 inOffset += charCount( codePoint ); 777 } 778 retValue = new String( newCodePoints, 0, outOffset ); 779 } 780 } 781 else if( nonNull( input ) ) 782 { 783 retValue = EMPTY_STRING; 784 } 785 786 //---* Done *---------------------------------------------------------- 787 return retValue; 788 } // capitalize() 789 790 /** 791 * <p>{@summary <i>Capitalises</i> a String, meaning changing the first 792 * letter to upper case as per 793 * {@link Character#toTitleCase(char)}.} No other letters are changed.</p> 794 * <p>A {@code null} input String returns {@code null}.</p> 795 * <p>Samples:</p> 796 * <pre><code> 797 * StringUtils.capitalize( null ) == null; 798 * StringUtils.capitalize( "" ) == ""; 799 * StringUtils.capitalize( "cat" ) == "Cat"; 800 * StringUtils.capitalize( "cAt" ) == "CAt";</code></pre> 801 * <p>Use this function to create a getter or setter name from the name of 802 * the attribute.</p> 803 * <p>This method does not recognise the 804 * {@linkplain java.util.Locale#getDefault() default locale}. 805 * This means that "istanbul" will become "Istanbul" 806 * even for the locale {@code tr_TR} (although "İstanbul" 807 * would be correct).</p> 808 * 809 * @param input The String to capitalise, can be {@code null}. 810 * @return The capitalised String, or {@code null} if the argument 811 * was already {@code null}. 812 * 813 * @see #capitalize(CharSequence) 814 * @see #decapitalize(CharSequence) 815 * 816 * @since 0.4.8 817 */ 818 @API( status = STABLE, since = "0.4.8" ) 819 public static final String capitalizeToTitle( final CharSequence input ) 820 { 821 String retValue = null; 822 if( isNotEmpty( input ) ) 823 { 824 final var str = input.toString(); 825 final var firstCodePoint = str.codePointAt( 0 ); 826 final var newCodePoint = toTitleCase( firstCodePoint ); 827 if( firstCodePoint == newCodePoint ) 828 { 829 retValue = str; 830 } 831 else 832 { 833 final var strLen = str.length(); 834 final var newCodePoints = new int [strLen]; 835 var outOffset = 0; 836 newCodePoints [outOffset++] = newCodePoint; 837 //noinspection ForLoopWithMissingComponent 838 for( var inOffset = charCount( firstCodePoint ); inOffset < strLen; ) 839 { 840 final var codePoint = str.codePointAt( inOffset ); 841 newCodePoints [outOffset++] = codePoint; 842 inOffset += charCount( codePoint ); 843 } 844 retValue = new String( newCodePoints, 0, outOffset ); 845 } 846 } 847 else if( nonNull( input ) ) 848 { 849 retValue = EMPTY_STRING; 850 } 851 852 //---* Done *---------------------------------------------------------- 853 return retValue; 854 } // capitalizeToTitle() 855 856 /** 857 * Tests if the given text is not {@code null}, not empty and not 858 * longer than the given maximum length. Use this to check whether a 859 * String that is provided as an argument to a method is longer than 860 * expected. 861 * 862 * @param name The name that should appear in the exception if one 863 * will be thrown. Usually this is the name of the argument to 864 * validate. 865 * @param text The text to check. 866 * @param maxLength The maximum length. 867 * @return Always the contents of <code>text</code> as a String; if the 868 * argument fails any of the tests, an 869 * {@link IllegalArgumentException} 870 * or an exception derived from that will be thrown. 871 * @throws CharSequenceTooLongException {@code text} is longer than 872 * {@code maxLength}. 873 * @throws EmptyArgumentException Either {@code name} or {@code text} is 874 * the empty String. 875 * @throws NullArgumentException Either {@code name} or {@code text} is 876 * {@code null}. 877 * 878 * @since 0.0.5 879 */ 880 @API( status = STABLE, since = "0.0.5" ) 881 public static final String checkTextLen( final String name, final CharSequence text, final int maxLength ) throws CharSequenceTooLongException, EmptyArgumentException, NullArgumentException 882 { 883 if( requireNotEmptyArgument( text, requireNotEmptyArgument( name, "name" ) ).length() > maxLength ) 884 { 885 throw new CharSequenceTooLongException( name, maxLength ); 886 } 887 888 //---* Done *---------------------------------------------------------- 889 return text.toString(); 890 } // checkTextLen() 891 892 /** 893 * Tests if the given text is not longer than the given maximum length; 894 * different from 895 * {@link #checkTextLen(String, CharSequence, int)}, 896 * it may be {@code null} or empty. 897 * 898 * @param name The name that should appear in the exception if one 899 * will be thrown. 900 * @param text The text to check; may be {@code null}. 901 * @param maxLength The maximum length. 902 * @return Always the contents of {@code text} as a String, {@code null} 903 * if {@code text} was {@code null}; if the argument fails any of the 904 * tests, an 905 * {@link IllegalArgumentException} 906 * or an exception derived from that will be thrown. 907 * @throws CharSequenceTooLongException {@code text} is longer than 908 * {@code maxLength}. 909 * @throws EmptyArgumentException {@code name} is empty. 910 * @throws NullArgumentException {@code name} is {@code null}. 911 * 912 * @since 0.0.5 913 */ 914 @API( status = STABLE, since = "0.0.5" ) 915 public static final String checkTextLenNull( final String name, final CharSequence text, final int maxLength ) throws CharSequenceTooLongException 916 { 917 requireNotEmptyArgument( name, "name" ); 918 919 String retValue = null; 920 if( nonNull( text ) ) 921 { 922 if( text.length() > maxLength ) 923 { 924 throw new CharSequenceTooLongException( name, maxLength ); 925 } 926 retValue = text.toString(); 927 } 928 929 //---* Done *---------------------------------------------------------- 930 return retValue; 931 } // checkTextLenNull() 932 933 /** 934 * <p>{@summary Changes the first letter of the given String to lower case 935 * as per 936 * {@link Character#toLowerCase(char)}.} 937 * No other letters are changed. A {@code null} input String returns 938 * {@code null}.</p> 939 * <p>Samples:</p> 940 * <pre><code> 941 * StringUtils.decapitalize( null ) = null; 942 * StringUtils.decapitalize("") = ""; 943 * StringUtils.decapitalize("Cat") = "cat"; 944 * StringUtils.decapitalize("CAT") = "cAT";</code></pre> 945 * <p>Basically, this is the complementary method to 946 * {@link #capitalize(CharSequence)}.</p> 947 * <p>Use this method to normalise the name of bean attributes.</p> 948 * 949 * @param input The String to <i>decapitalise</i>, may be {@code null}. 950 * @return The <i>decapitalised</i> String, {@code null} if the argument 951 * was {@code null}. 952 * @see #capitalize(CharSequence) 953 * 954 * @since 0.0.5 955 */ 956 @API( status = STABLE, since = "0.1.0" ) 957 public static final String decapitalize( final CharSequence input ) 958 { 959 String retValue = null; 960 if( isNotEmpty( input ) ) 961 { 962 final var str = input.toString(); 963 final var firstCodePoint = str.codePointAt( 0 ); 964 final var newCodePoint = toLowerCase( firstCodePoint ); 965 if( firstCodePoint == newCodePoint ) 966 { 967 retValue = str; 968 } 969 else 970 { 971 final var strLen = str.length(); 972 final var newCodePoints = new int [strLen]; 973 var outOffset = 0; 974 newCodePoints [outOffset++] = newCodePoint; 975 //noinspection ForLoopWithMissingComponent 976 for( var inOffset = charCount( firstCodePoint ); inOffset < strLen; ) 977 { 978 final var codePoint = str.codePointAt( inOffset ); 979 newCodePoints [outOffset++] = codePoint; 980 inOffset += charCount( codePoint ); 981 } 982 retValue = new String( newCodePoints, 0, outOffset ); 983 } 984 } 985 else if( nonNull( input ) ) 986 { 987 retValue = EMPTY_STRING; 988 } 989 990 //---* Done *---------------------------------------------------------- 991 return retValue; 992 } // decapitalize() 993 994 /** 995 * <p>{@summary Escapes the non-ASCII and special characters in a 996 * {@code String} so that the result can be used in the context of HTML.} 997 * Wherever possible, the method will return the respective HTML 5 998 * entity; only when there is no matching entity, it will use the Unicode 999 * escape.</p> 1000 * <p>So if you call the method with the argument 1001 * "<i>Süße</i>", it will return 1002 * "<code>S&uuml;&szlig;e</code>".</p> 1003 * <p>If the input will be, for example, a Chinese text like this: 1004 * "<i>球体</i>" (means "Ball"), you may get back something like 1005 * this: "<code>&#x7403;&#x4F53;</code>", as there are 1006 * no entities defined for (any) Chinese letters.</p> 1007 * <p>The method supports all known HTML 5.0 entities, including 1008 * funky accents. But it will not escape several commonly used characters 1009 * like the full stop ('.'), the comma (','), the colon (':'), or the 1010 * semicolon (';'), although they will be handled properly by 1011 * {@link #unescapeHTML(CharSequence)}.</p> 1012 * <p>Note that the commonly used apostrophe escape character 1013 * (&apos;) that was not a legal entity for HTML before HTML 5 is 1014 * now supported.</p> 1015 * 1016 * @param input The {@code String} to escape, may be {@code null}. 1017 * @return A new escaped {@code String}, or {@code null} if the 1018 * argument was already {@code null}. 1019 * 1020 * @see #unescapeHTML(CharSequence) 1021 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 1022 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 1023 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 1024 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 1025 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 1026 * 1027 * @since 0.0.5 1028 */ 1029 /* 1030 * For some unknown reasons, Javadoc will not accept the entities 球 1031 * and 体 (for '球' and '体'), therefore it was required to add the 1032 * Chinese characters directly into the comment above. 1033 */ 1034 @API( status = STABLE, since = "0.0.5" ) 1035 public static final String escapeHTML( final CharSequence input ) 1036 { 1037 final var retValue = nonNull( input ) ? HTML50.escape( input ) : null; 1038 1039 //---* Done *---------------------------------------------------------- 1040 return retValue; 1041 } // escapeHTML() 1042 1043 /** 1044 * Escapes the characters in a {@code String} using HTML entities and 1045 * writes them to an 1046 * {@link Appendable}. 1047 * For details, refer to 1048 * {@link #escapeHTML(CharSequence)}. 1049 * 1050 * @param appendable The appendable object receiving the escaped string. 1051 * @param input The {@code String} to escape, may be {@code null}. 1052 * @throws NullArgumentException The appendable is {@code null}. 1053 * @throws IOException when {@code Appendable} passed throws the exception 1054 * from calls to the 1055 * {@link Appendable#append(char)} 1056 * method. 1057 * 1058 * @see #escapeHTML(CharSequence) 1059 * @see #unescapeHTML(CharSequence) 1060 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 1061 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 1062 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 1063 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 1064 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 1065 * 1066 * @since 0.0.5 1067 */ 1068 @API( status = STABLE, since = "0.0.5" ) 1069 public static final void escapeHTML( final Appendable appendable, final CharSequence input ) throws IOException 1070 { 1071 requireNonNullArgument( appendable, "appendable" ); 1072 1073 if( nonNull( input ) ) HTML50.escape( appendable, input ); 1074 } // escapeHTML() 1075 1076 /** 1077 * Formats the given {@code String} for the output into JSONText. This 1078 * means that the input sequence will be surrounded by double quotes, and 1079 * backslash sequences are put into all the right places.<br> 1080 * <br>< and > will be inserted as their Unicode values, allowing 1081 * JSON text to be delivered in HTML.<br> 1082 * <br>In JSON text, a string cannot contain a control character or an 1083 * unescaped quote or backslash, so these are translated to Unicode 1084 * escapes also. 1085 * 1086 * @param input The string to escape to the JSON format; it may be 1087 * empty, but not {@code null}. 1088 * @return A string correctly formatted for insertion in a JSON text. 1089 * 1090 * @since 0.0.5 1091 */ 1092 @SuppressWarnings( "OverlyComplexMethod" ) 1093 @API( status = STABLE, since = "0.0.5" ) 1094 public static final String escapeJSON( final CharSequence input ) 1095 { 1096 var retValue = "\"\""; // The JSON empty string. 1097 final var len = requireNonNullArgument( input, "input" ).length(); 1098 if( len > 0 ) 1099 { 1100 final var buffer = new StringBuilder( len * 2 ).append( '"' ); 1101 char c; 1102 for( var i = 0; i < len; ++i ) 1103 { 1104 c = input.charAt( i ); 1105 switch( c ) 1106 { 1107 case '\\', '"', '<', '>', '&' -> buffer.append( escapeCharacter( c ) ); 1108 1109 case '\b' -> buffer.append( "\\b" ); 1110 1111 case '\t' -> buffer.append( "\\t" ); 1112 1113 case '\n'-> buffer.append( "\\n" ); 1114 1115 case '\f' -> buffer.append( "\\f" ); 1116 1117 case '\r' -> buffer.append( "\\r" ); 1118 1119 default -> 1120 { 1121 //noinspection OverlyComplexBooleanExpression,CharacterComparison,UnnecessaryUnicodeEscape 1122 if( (c < ' ') 1123 || ((c >= '\u0080') && (c < '\u00a0')) 1124 || ((c >= '\u2000') && (c < '\u2100')) ) 1125 { 1126 buffer.append( escapeCharacter( c ) ); 1127 } 1128 else 1129 { 1130 buffer.append( c ); 1131 } 1132 } 1133 } 1134 } 1135 buffer.append( '"' ); 1136 retValue = buffer.toString(); 1137 } 1138 1139 //---* Done *---------------------------------------------------------- 1140 return retValue; 1141 } // escapeJSON() 1142 1143 /** 1144 * Escapes the given character using Regex escapes and writes them to a 1145 * {@link Appendable}. 1146 * 1147 * @param appendable The appendable receiving the escaped string. 1148 * @param c The character to escape. 1149 * @throws NullArgumentException The appendable is {@code null}. 1150 * @throws IOException when {@code Appendable} passed throws the exception 1151 * from calls to the 1152 * {@link Appendable#append(CharSequence)} 1153 * method. 1154 * 1155 * @since 0.0.5 1156 */ 1157 @SuppressWarnings( "SwitchStatementWithTooManyBranches" ) 1158 @API( status = STABLE, since = "0.0.5" ) 1159 public static final void escapeRegex( final Appendable appendable, final char c ) throws IOException 1160 { 1161 requireNonNullArgument( appendable, "appendable" ); 1162 1163 TestSwitch: switch( c ) 1164 { 1165 case '\\' -> appendable.append( "\\" ); 1166 case '[', ']', '{', '}', '(', ')', '^', '$', '&', '*', '.', '+', '|', '?' -> appendable.append( "\\" ).append( c ); 1167 case '\t' -> appendable.append( "\\t" ); 1168 case '\n' -> appendable.append( "\\n" ); 1169 case '\r' -> appendable.append( "\\r" ); 1170 case '\f' -> appendable.append( "\\f" ); 1171 case '\u0007' -> appendable.append( "\\a" ); 1172 case '\u001B' -> appendable.append( "\\e" ); // ESC 1173 default -> appendable.append( c ); 1174 } // TestSwitch: 1175 } // escapeRegex() 1176 1177 /** 1178 * Escapes the given character using Regex escapes. 1179 * 1180 * @param c The character to escape. 1181 * @return A {@code String} with the escaped character. 1182 * 1183 * @since 0.0.5 1184 */ 1185 @API( status = STABLE, since = "0.0.5" ) 1186 public static final String escapeRegex( final char c ) 1187 { 1188 final var retValue = new StringBuilder(); 1189 try 1190 { 1191 escapeRegex( retValue, c ); 1192 } 1193 catch( final IOException e ) 1194 { 1195 /* 1196 * We append to a StringBuilder, and StringBuilder.append() does 1197 * not define an IOException. 1198 */ 1199 throw new ImpossibleExceptionError( e ); 1200 } 1201 1202 //---* Done *---------------------------------------------------------- 1203 return retValue.toString(); 1204 } // escapeRegex() 1205 1206 /** 1207 * Escapes the characters in a {@code String} using Regex escapes. 1208 * 1209 * @param input The {@code String} to escape, may be {@code null}. 1210 * @return A new escaped {@code String}, or {@code null} if the argument 1211 * was already {@code null}. 1212 * 1213 * @since 0.0.5 1214 */ 1215 @API( status = STABLE, since = "0.0.5" ) 1216 public static final String escapeRegex( final CharSequence input ) 1217 { 1218 String retValue = null; 1219 if( nonNull( input ) ) 1220 { 1221 final var len = input.length(); 1222 if( len > 0 ) 1223 { 1224 final var buffer = new StringBuilder( (len * 12) / 10 ); 1225 try 1226 { 1227 escapeRegex( buffer, input ); 1228 } 1229 catch( final IOException e ) 1230 { 1231 /* 1232 * We append to a StringBuilder, and StringBuilder.append() does 1233 * not define an IOException. 1234 */ 1235 throw new ImpossibleExceptionError( e ); 1236 } 1237 retValue = buffer.toString(); 1238 } 1239 else 1240 { 1241 retValue = EMPTY_STRING; 1242 } 1243 } 1244 1245 //---* Done *---------------------------------------------------------- 1246 return retValue; 1247 } // escapeRegex() 1248 1249 /** 1250 * Escapes the characters in a {@code String} using Regex escapes and 1251 * writes them to a 1252 * {@link Appendable}. 1253 * 1254 * @param appendable The appendable receiving the escaped string. 1255 * @param input The {@code String} to escape. If {@code null} or the empty 1256 * String, nothing will be put to the appendable. 1257 * @throws NullArgumentException The appendable is {@code null}. 1258 * @throws IOException when {@code Appendable} passed throws the exception 1259 * from calls to the 1260 * {@link Appendable#append(CharSequence)} 1261 * method. 1262 * 1263 * @since 0.0.5 1264 */ 1265 @API( status = STABLE, since = "0.0.5" ) 1266 public static final void escapeRegex( final Appendable appendable, final CharSequence input ) throws IOException 1267 { 1268 requireNonNullArgument( appendable, "appendable" ); 1269 1270 if( isNotEmpty( input ) ) 1271 { 1272 ScanLoop: for( var i = 0; i < input.length(); ++i ) 1273 { 1274 escapeRegex( appendable, input.charAt( i ) ); 1275 } // ScanLoop: 1276 } 1277 } // escapeRegex() 1278 1279 /** 1280 * <p>{@summary Escapes the characters in a {@code String} using XML 1281 * entities.}</p> 1282 * <p>For example:</p> 1283 * <p>{@code "bread" & "butter"}</p> 1284 * <p>becomes:</p> 1285 * <p><code>&quot;bread&quot; &amp; 1286 * &quot;butter&quot;</code>.</p> 1287 * 1288 * @param input The {@code String} to escape, may be null. 1289 * @return A new escaped {@code String}, or {@code null} if the 1290 * argument was already {@code null}. 1291 * 1292 * @see #unescapeXML(CharSequence) 1293 */ 1294 @API( status = STABLE, since = "0.0.5" ) 1295 public static final String escapeXML( final CharSequence input ) 1296 { 1297 final var retValue = nonNull( input ) ? XML.escape( input ) : null; 1298 1299 //---* Done *---------------------------------------------------------- 1300 return retValue; 1301 } // escapeXML() 1302 1303 /** 1304 * <p>{@summary Escapes the characters in a {@code String} using XML 1305 * entities and writes them to an 1306 * {@link Appendable}.}</p> 1307 * <p>For example:</p> 1308 * <p>{@code "bread" & "butter"}</p> 1309 * <p>becomes:</p> 1310 * <p><code>&quot;bread&quot; &amp; 1311 * &quot;butter&quot;</code>.</p> 1312 * 1313 * @param appendable The appendable object receiving the escaped string. 1314 * @param input The {@code String} to escape, may be {@code null}. 1315 * @throws NullArgumentException The appendable is {@code null}. 1316 * @throws IOException when {@code Appendable} passed throws the exception 1317 * from calls to the 1318 * {@link Appendable#append(char)} 1319 * method. 1320 * 1321 * @see #escapeXML(CharSequence) 1322 * @see #unescapeXML(CharSequence) 1323 * 1324 * @since 0.0.5 1325 */ 1326 @API( status = STABLE, since = "0.0.5" ) 1327 public static final void escapeXML( final Appendable appendable, final CharSequence input ) throws IOException 1328 { 1329 requireNonNullArgument( appendable, "appendable" ); 1330 1331 if( nonNull( input ) ) XML.escape( appendable, input ); 1332 } // escapeXML() 1333 1334 /** 1335 * Tests if the given String is {@code null} or the empty String. 1336 * 1337 * @param input The String to test. 1338 * @return {@code true} if the given String reference is 1339 * {@code null} or the empty String. 1340 * 1341 * @since 0.0.5 1342 */ 1343 @API( status = STABLE, since = "0.0.5" ) 1344 public static final boolean isEmpty( final CharSequence input ) { return isNull( input ) || input.isEmpty(); } 1345 1346 /** 1347 * Tests if the given String is {@code null}, the empty String, or just 1348 * containing whitespace. 1349 * 1350 * @param input The String to test. 1351 * @return {@code true} if the given String reference is not 1352 * {@code null} and not the empty String. 1353 * 1354 * @see String#isBlank() 1355 * 1356 * @since 0.0.5 1357 */ 1358 @API( status = STABLE, since = "0.0.5" ) 1359 public static final boolean isEmptyOrBlank( final CharSequence input ) 1360 { 1361 final var retValue = isNull( input ) || input.toString().isBlank(); 1362 1363 //---* Done *---------------------------------------------------------- 1364 return retValue; 1365 } // isEmptyOrBlank() 1366 1367 /** 1368 * Tests if the given String is not {@code null} and not the empty 1369 * String. 1370 * 1371 * @param input The String to test. 1372 * @return {@code true} if the given String reference is not 1373 * {@code null} and not the empty String. 1374 * 1375 * @since 0.0.5 1376 */ 1377 @API( status = STABLE, since = "0.0.5" ) 1378 public static final boolean isNotEmpty( final CharSequence input ) { return nonNull( input ) && !input.isEmpty(); } 1379 1380 /** 1381 * Tests if the given String is not {@code null}, not the empty String, 1382 * and that it contains other characters than just whitespace. 1383 * 1384 * @param input The String to test. 1385 * @return {@code true} if the given String reference is not 1386 * {@code null} and not the empty String, and it contains other 1387 * characters than just whitespace. 1388 * 1389 * @see String#isBlank() 1390 * 1391 * @since 0.0.5 1392 */ 1393 @API( status = STABLE, since = "0.0.5" ) 1394 public static final boolean isNotEmptyOrBlank( final CharSequence input ) 1395 { 1396 final var retValue = nonNull( input ) && !input.toString().isBlank(); 1397 1398 //---* Done *---------------------------------------------------------- 1399 return retValue; 1400 } // isNotEmptyOrBlank() 1401 1402 /** 1403 * <p>{@summary Returns the given replacement value if the given String is 1404 * {@code null} or empty.} Otherwise the original String is returned.</p> 1405 * 1406 * @param input The String to test. 1407 * @param replacement The replacement; can be {@code null}. 1408 * @return Either the {@code input} or the {@code replacment} in case the 1409 * input is {@code null} or empty. 1410 * 1411 * @see #isEmpty(CharSequence) 1412 * @see Objects#mapFromNull(Object,Object) 1413 * 1414 * @since 0.25.2 1415 */ 1416 @API( status = STABLE, since = "0.25.2" ) 1417 public static final CharSequence mapFromEmpty( final CharSequence input, final CharSequence replacement ) 1418 { 1419 final var retValue = isNull( input ) || input.isEmpty() ? replacement : input; 1420 1421 //---* Done *---------------------------------------------------------- 1422 return retValue; 1423 } // mapFromEmpty() 1424 1425 /** 1426 * <p>{@summary Returns the replacement provided by the given supplier if 1427 * the given String is {@code null} or empty.} Otherwise the original 1428 * String is returned.</p> 1429 * 1430 * @param input The String to test. 1431 * @param replacementSupplier Provides a replacement for the empty input. 1432 * @return Either the {@code input} or the replacement value in case the 1433 * input is {@code null} or empty. 1434 * 1435 * @see #isEmpty(CharSequence) 1436 * @see Objects#mapFromNull(Object, Supplier) 1437 * 1438 * @since 0.25.2 1439 */ 1440 @API( status = STABLE, since = "0.25.2" ) 1441 public static final CharSequence mapFromEmpty( final CharSequence input, final Supplier<? extends CharSequence> replacementSupplier ) 1442 { 1443 requireNonNullArgument( replacementSupplier, "replacementSupplier" ); 1444 final var retValue = isNull( input ) || input.isEmpty() ? replacementSupplier.get() : input; 1445 1446 //---* Done *---------------------------------------------------------- 1447 return retValue; 1448 } // mapFromEmpty() 1449 1450 /** 1451 * Determines the maximum length over all Strings provided in the given 1452 * {@link Stream}. 1453 * 1454 * @param stream The strings. 1455 * @return The length of the longest string in the list; -1 if all values 1456 * in the given {@code stream} are {@code null}, and 1457 * {@link Integer#MIN_VALUE} 1458 * if the given {@code stream} is empty. 1459 * 1460 * @since 0.0.5 1461 */ 1462 @API( status = STABLE, since = "0.0.5" ) 1463 public static final int maxContentLength( final Stream<? extends CharSequence> stream ) 1464 { 1465 final var retValue = requireNonNullArgument( stream, "stream" ) 1466 .mapToInt( string -> nonNull( string ) ? string.length() : -1 ) 1467 .max() 1468 .orElse( Integer.MIN_VALUE ); 1469 1470 //---* Done *---------------------------------------------------------- 1471 return retValue; 1472 } // maxContentLength() 1473 1474 /** 1475 * Determines the maximum length over all strings provided in the given 1476 * {@link Collection}. 1477 * 1478 * @param list The strings. 1479 * @return The length of the longest string in the list; -1 if all values 1480 * in the given {@code list} are {@code null}, and 1481 * {@link Integer#MIN_VALUE} 1482 * if the given {@code list} is empty. 1483 * 1484 * @since 0.0.5 1485 */ 1486 @API( status = STABLE, since = "0.0.5" ) 1487 public static final int maxContentLength( final Collection<? extends CharSequence> list ) 1488 { 1489 final var retValue = maxContentLength( requireNonNullArgument( list, "list" ).stream() ); 1490 1491 //---* Done *---------------------------------------------------------- 1492 return retValue; 1493 } // maxContentLength() 1494 1495 /** 1496 * Determines the maximum length over all strings provided in the given 1497 * array. 1498 * 1499 * @param a The strings. 1500 * @return The length of the longest string in the list; -1 if all values 1501 * in the array are {@code null}, and 1502 * {@link Integer#MIN_VALUE} 1503 * if the given array has zero length. 1504 * 1505 * @since 0.0.5 1506 */ 1507 @API( status = STABLE, since = "0.0.5" ) 1508 public static final int maxContentLength( final CharSequence... a ) 1509 { 1510 final var retValue = maxContentLength( Arrays.stream( requireNonNullArgument( a, "a" ) ) ); 1511 1512 //---* Done *---------------------------------------------------------- 1513 return retValue; 1514 } // maxContentLength() 1515 1516 /** 1517 * <p>{@summary Normalizes the given String to a pure ASCII String.} This 1518 * replaces 'ß' by 'ss' and replaces all diacritical characters by their 1519 * base form (that mean that 'ü' gets 'u' and so on). For the normalising 1520 * of a search criteria, this should be sufficient, although it may cause 1521 * issues for non-latin scripts, as for these the input can be mapped to 1522 * the empty String. 1523 * 1524 * @note The Scandinavian letters 'ø' and 'Ø' are not diacritical 1525 * letters, nevertheless they will be replaced. 1526 * 1527 * @param input The input string. 1528 * @return The normalised String, only containing ASCII characters; it 1529 * could be empty. 1530 * 1531 * TODO Check the implementation and the results!! 2022-12-10 1532 */ 1533 public static final String normalizeToASCII( final CharSequence input ) 1534 { 1535 final var str = requireNonNullArgument( input, "s" ).toString() 1536 .replace( "ß", "ss" ) 1537 .replace( 'ø', 'o' ) 1538 .replace( 'Ø', 'O' ); 1539 final var retValue = normalize( str, NFD ) 1540 .replaceAll( "[^\\p{ASCII}]", EMPTY_STRING ); 1541 1542 //---* Done *---------------------------------------------------------- 1543 return retValue; 1544 } // normalizeToASCII() 1545 1546 /** 1547 * Brings the given string to the given length and uses the provided 1548 * padding character to fill up the string. 1549 * 1550 * @param input The string to format. 1551 * @param length The desired length; if 0 or less, the given string is 1552 * returned, regardless of {@code clip}. 1553 * @param c The pad character. 1554 * @param mode The 1555 * {@linkplain StringUtils.Padding pad mode}. 1556 * @param clip {@code true} if the input string should be cut in case 1557 * it is longer than {@code length}, {@code false} if it has to be 1558 * returned unchanged . 1559 * @return The re-formatted string. 1560 * 1561 * @since 0.0.5 1562 */ 1563 @API( status = STABLE, since = "0.0.5" ) 1564 public static final String pad( final CharSequence input, final int length, final char c, final Padding mode, final boolean clip ) 1565 { 1566 return pad( input, length, c, mode, clip ? CLIPPING_CUT : CLIPPING_NONE ); 1567 } // pad() 1568 1569 /** 1570 * Brings the given string to the given length and uses the provided 1571 * padding character to fill up the string. 1572 * 1573 * @param input The string to format. 1574 * @param length The desired length; if 0 or less, the given string is 1575 * returned, regardless of {@code clip}. 1576 * @param c The pad character. 1577 * @param mode The 1578 * {@linkplain StringUtils.Padding pad mode}. 1579 * @param clip The 1580 * {@linkplain StringUtils.Clipping clipping mode}. 1581 * @return The re-formatted string. 1582 * 1583 * @since 0.0.5 1584 */ 1585 @API( status = STABLE, since = "0.0.5" ) 1586 public static final String pad( final CharSequence input, final int length, final char c, final Padding mode, final Clipping clip ) 1587 { 1588 //noinspection OverlyComplexBooleanExpression 1589 if( ((requireNonNullArgument( clip, "clip" ) == CLIPPING_ABBREVIATE) && (length < 4)) || ((clip == CLIPPING_ABBREVIATE_MIDDLE) && (length < 5)) ) 1590 { 1591 throw new ValidationException( "Length %d is too short for clipping mode %s".formatted( length, clip.toString() ) ); 1592 } 1593 requireNonNullArgument( mode, "mode" ); 1594 1595 final String retValue; 1596 final var currentLength = requireNonNullArgument( input, "input" ).length(); 1597 1598 if( (length > 0) && (length != currentLength) ) 1599 { 1600 if( currentLength > length ) 1601 { 1602 retValue = clip.clip( input, length ); 1603 } 1604 else 1605 { 1606 final var padSize = length - currentLength; 1607 retValue = mode.pad( input, padSize, c ); 1608 } 1609 } 1610 else 1611 { 1612 retValue = input.toString(); 1613 } 1614 1615 //---* Done *---------------------------------------------------------- 1616 return retValue; 1617 } // pad() 1618 1619 /** 1620 * <p>{@summary Fills up the given string to the given length by adding 1621 * blanks on both sides; will abbreviate the string if it is longer than 1622 * the given length.} The minimum length is 5.</p> 1623 * <p>This is a shortcut to a call to 1624 * {@link #pad(CharSequence,int,char,Padding,Clipping) pad( input, length, ' ', PADDING_CENTER, CLIPPING_ABBREVIATE_MIDDLE ) }.</p> 1625 * 1626 * @param input The string to format. 1627 * @param length The desired length; minimum value is 5. 1628 * @return The re-formatted string. 1629 * 1630 * @see Padding#PADDING_CENTER 1631 * @see Clipping#CLIPPING_ABBREVIATE_MIDDLE 1632 * 1633 * @since 0.0.5 1634 */ 1635 @API( status = STABLE, since = "0.0.5" ) 1636 public static final String padCenter( final CharSequence input, final int length ) { return pad( input, length, ' ', PADDING_CENTER, CLIPPING_ABBREVIATE_MIDDLE ); } 1637 1638 /** 1639 * <p>{@summary Fills up the given string to the given length by adding 1640 * blanks on the left side; will abbreviate the string if it is longer 1641 * than the given length.} The minimum length is 4.</p> 1642 * <p>This is a shortcut to a call to 1643 * {@link #pad(CharSequence,int,char,Padding,Clipping) pad( input, length, ' ', PADDING_LEFT, CLIPPING_ABBREVIATE ) }.</p> 1644 * 1645 * @param input The string to format. 1646 * @param length The desired length; the minimum value is 4. 1647 * @return The re-formatted string. 1648 * 1649 * @see Padding#PADDING_LEFT 1650 * @see Clipping#CLIPPING_ABBREVIATE 1651 * 1652 * @since 0.0.5 1653 */ 1654 @API( status = STABLE, since = "0.0.5" ) 1655 public static final String padLeft( final CharSequence input, final int length ) { return pad( input, length, ' ', PADDING_LEFT, CLIPPING_ABBREVIATE ); } 1656 1657 /** 1658 * <p>{@summary Fills up the given string to the given length by adding 1659 * blanks on the right side; will abbreviate the string if it is longer 1660 * than the given length.} The minimum length is 4.</p> 1661 * <p>This is a shortcut to a call to 1662 * {@link #pad(CharSequence,int,char,Padding,Clipping) pad( input, length, ' ', PADDING_RIGHT, CLIPPING_ABBREVIATE ) }.</p> 1663 * 1664 * @param input The string to format. 1665 * @param length The desired length; the minimum value is 4. 1666 * @return The re-formatted string. 1667 * 1668 * @see Padding#PADDING_RIGHT 1669 * @see Clipping#CLIPPING_ABBREVIATE 1670 * 1671 * @since 0.0.5 1672 */ 1673 @API( status = STABLE, since = "0.0.5" ) 1674 public static final String padRight( final CharSequence input, final int length ) { return pad( input, length, ' ', PADDING_RIGHT, CLIPPING_ABBREVIATE ); } 1675 1676 /** 1677 * <p>{@summary Surrounds the given String with double-quotes 1678 * (", &#34;).}</p> 1679 * <p>When the double-quote is needed in a String constant, it has to be 1680 * escaped with a backslash:</p> 1681 * <pre><code>"\"…\""</code></pre> 1682 * <p>Sometimes, this is just ugly, and there this method comes into 1683 * play.</p> 1684 * 1685 * @param input The String to surround; can be {@code null}. 1686 * @return The quoted String; will be {@code null} if the argument was 1687 * {@code null} already. 1688 */ 1689 public static final String quote( final CharSequence input ) 1690 { 1691 final var retValue = isNull( input ) ? null : String.format( "\"%s\"", input ); 1692 1693 //---* Done *---------------------------------------------------------- 1694 return retValue; 1695 } // quote() 1696 1697 /** 1698 * <p>{@summary This method replaces all diacritical characters in the 1699 * input String by their base form.} That means that 'ü' gets 'u', `È' 1700 * gets 'E' and so on.</p> 1701 * <p>This differs from 1702 * {@link #normalizeToASCII(CharSequence)} 1703 * as this method still allows non-ASCII characters in the output.</p> 1704 * 1705 * @note The Scandinavian letters 'ø' and 'Ø' are not diacritical 1706 * letters, meaning they will not be replaced. 1707 * 1708 * @param input The input string. 1709 * @return The normalised String, not containing any diacritical 1710 * characters. 1711 * 1712 * TODO Check the implementation and the results!! 2022-12-10 1713 */ 1714 public static final String removeDiacriticalMarks( final CharSequence input ) 1715 { 1716 final var retValue = normalize( requireNonNullArgument( input, "input" ), NFD ) 1717 .replaceAll("\\p{InCombiningDiacriticalMarks}+", EMPTY_STRING ); 1718 1719 //---* Done *---------------------------------------------------------- 1720 return retValue; 1721 } // removeDiacriticalMarks() 1722 1723 /** 1724 * Repeats the given char {@code repeat} to form a new String. The table 1725 * below shows the various result for some argument combinations.<br> 1726 * <br><code> 1727 * StringUtils.repeat( 'a', 0 ) ⇒ ""<br> 1728 * StringUtils.repeat( 'a', 3 ) ⇒ "aaa"<br> 1729 * StringUtils.repeat( 'a', -2 ) ⇒ ""<br> 1730 * </code> 1731 * 1732 * @param c The character to repeat. 1733 * @param count The number of times to repeat {@code c}; a negative 1734 * value will be treated as zero. 1735 * @return A new String consisting of the given character repeated 1736 * {@code count} times, or the empty String if {@code count} was 0 1737 * or negative. 1738 * 1739 * @see String#repeat(int) 1740 * 1741 * @since 0.0.5 1742 */ 1743 @API( status = STABLE, since = "0.0.5" ) 1744 public static final String repeat( final char c, final int count ) 1745 { 1746 final var retValue = ( count > 0 ? Character.toString( c ).repeat( count ) : EMPTY_STRING).intern(); 1747 1748 //---* Done *---------------------------------------------------------- 1749 return retValue; 1750 } // repeat() 1751 1752 /** 1753 * Repeats the given char {@code repeat}, identified by its code point, to 1754 * form a new String. The 1755 * table below shows the various result for some argument 1756 * combinations.<br> 1757 * <br><code> 1758 * StringUtils.repeat( 'a', 0 ) ⇒ ""<br> 1759 * StringUtils.repeat( 'a', 3 ) ⇒ "aaa"<br> 1760 * StringUtils.repeat( 'a', -2 ) ⇒ ""<br> 1761 * </code> 1762 * 1763 * @param codePoint The character to repeat. 1764 * @param count The number of times to repeat {@code c}; a negative 1765 * value will be treated as zero. 1766 * @return A new String consisting of the given character repeated 1767 * {@code count} times, or the empty String if {@code count} was 0 1768 * or negative, or {@code null} if the code point is invalid. 1769 * 1770 * @see Character#isValidCodePoint(int) 1771 * @see String#repeat(int) 1772 * 1773 * @since 0.0.5 1774 */ 1775 @API( status = STABLE, since = "0.0.5" ) 1776 public static final String repeat( final int codePoint, final int count ) 1777 { 1778 final var retValue = (count > 0) 1779 ? isValidCodePoint( codePoint ) 1780 ? Character.toString( codePoint ).repeat( count ).intern() 1781 : null 1782 : EMPTY_STRING; 1783 1784 //---* Done *---------------------------------------------------------- 1785 return retValue; 1786 } // repeat() 1787 1788 /** 1789 * Repeats the given String {@code repeat} times to form a new String. The 1790 * table below shows the various result for some argument 1791 * combinations.<br> 1792 * <br><code> 1793 * StringUtils.repeat( null, 2 ) ⇒ null<br> 1794 * StringUtils.repeat( "", 0 ) ⇒ ""<br> 1795 * StringUtils.repeat( "", 2 ) ⇒ ""<br> 1796 * StringUtils.repeat( "a", 3 ) ⇒ "aaa"<br> 1797 * StringUtils.repeat( "ab", 2 ) ⇒ "abab"<br> 1798 * StringUtils.repeat( "a", -2 ) ⇒ ""<br> 1799 * </code> 1800 * 1801 * @param input The String to repeat, may be {@code null}. 1802 * @param count The number of times to repeat {@code str}; a negative 1803 * value will be treated as zero. 1804 * @return A new String consisting of the original String repeated, 1805 * {@code count} times, the empty String if {@code count} was 0 1806 * or negative, or {@code null} if the input String was 1807 * {@code null}, too. 1808 * 1809 * @see String#repeat(int) 1810 * 1811 * @since 0.0.5 1812 */ 1813 @API( status = STABLE, since = "0.0.5" ) 1814 public static final String repeat( final CharSequence input, final int count ) 1815 { 1816 final var retValue = 1817 nonNull( input ) 1818 ? (count > 0) && !input.isEmpty() 1819 ? input.toString().repeat( count ) 1820 : EMPTY_STRING 1821 : null; 1822 1823 //---* Done *---------------------------------------------------------- 1824 return retValue; 1825 } // repeat() 1826 1827 /** 1828 * <p>{@summary Splits a String by the given separator character and 1829 * returns an array of all parts.} In case a separator character is 1830 * immediately followed by another separator character, an empty String 1831 * will be placed to the array.</p> 1832 * <p>Beginning and end of the String are treated as separators. If the 1833 * first character of the String is a separator, the returned array will 1834 * start with an empty String, as it will end with an empty String if the 1835 * last character is a separator.</p> 1836 * <p>In case the String is empty, the return value will be an array 1837 * containing just the empty String. It will not be empty.</p> 1838 * 1839 * @param input The String to split. 1840 * @param separator The separator character. 1841 * @return The parts of the String. 1842 * 1843 * @since 0.0.5 1844 */ 1845 @API( status = STABLE, since = "0.0.5" ) 1846 public static final String [] splitString( final CharSequence input, final char separator ) 1847 { 1848 return splitString( input, (int) separator ); 1849 } // splitString() 1850 1851 /** 1852 * <p>{@summary Splits a String by the given separator character, 1853 * identified by its Unicode code point, and returns an array of all 1854 * parts.} In case a separator character is immediately followed by 1855 * another separator character, an empty String will be placed to the 1856 * array.</p> 1857 * <p>Beginning and end of the String are treated as separators, so if the 1858 * first character of the String is a separator, the returned array will 1859 * start with an empty String, as it will end with an empty String if the 1860 * last character is a separator.</p> 1861 * <p>In case the String is empty, the return value will be an array 1862 * containing just the empty String. It will not be empty.</p> 1863 * 1864 * @param input The String to split. 1865 * @param separator The code point for the separator character. 1866 * @return The parts of the String. 1867 * 1868 * @since 0.0.5 1869 */ 1870 @API( status = STABLE, since = "0.0.5" ) 1871 public static final String [] splitString( final CharSequence input, final int separator ) 1872 { 1873 final var retValue = stream( input, separator ).toArray( String []::new ); 1874 1875 //---* Done *---------------------------------------------------------- 1876 return retValue; 1877 } // splitString() 1878 1879 /** 1880 * <p>{@summary Splits a String by the given separator sequence and 1881 * returns an array of all parts.} In case a separator sequence is 1882 * immediately followed by another separator sequence, an empty String 1883 * will be placed to the array.</p> 1884 * <p>Beginning and end of the String are treated as separators, so if the 1885 * first part of the String equals the separator sequence, the returned 1886 * array will start with an empty String, as it will end with an empty 1887 * String if the last part would equal the separator sequence.</p> 1888 * <p>In case the String is empty, the return value will be an array 1889 * containing just the empty String. It will not be empty.</p> 1890 * 1891 * @param input The String to split. 1892 * @param separator The separator sequence. 1893 * @return The parts of the String. 1894 * 1895 * @since 0.0.5 1896 */ 1897 @API( status = STABLE, since = "0.0.5" ) 1898 public static final String [] splitString( final CharSequence input, final CharSequence separator ) 1899 { 1900 final var retValue = stream( input, separator).toArray( String []::new ); 1901 1902 //---* Done *---------------------------------------------------------- 1903 return retValue; 1904 } // splitString() 1905 1906 /** 1907 * <p>{@summary Splits a String by the given separator character and 1908 * returns an instance of 1909 * {@link Stream} 1910 * providing all parts.} In case a separator character is immediately 1911 * followed by another separator character, an empty String will be put to 1912 * the {@code Stream}.</p> 1913 * <p>Beginning and end of the String are treated as separators, so if the 1914 * first character of the String is a separator, the returned 1915 * {@code Stream} will start with an empty String, as it will end with an 1916 * empty String if the last character is a separator.</p> 1917 * <p>In case the String is empty, the return value will be a 1918 * {@code Stream} containing just the empty String. It will not be 1919 * empty.</p> 1920 * 1921 * @param input The String to split. 1922 * @param separator The separator character. 1923 * @return A {@code Stream} instance with the parts of the String. 1924 * 1925 * @since 0.0.7 1926 */ 1927 @API( status = STABLE, since = "0.0.7" ) 1928 public static final Stream<String> stream( final CharSequence input, final char separator ) 1929 { 1930 return stream( input, (int) separator ); 1931 } // stream() 1932 1933 /** 1934 * <p>{@summary Splits a String by the given separator character, identified by its 1935 * Unicode code point, and returns a 1936 * {@link Stream} 1937 * of all parts.} In case a separator character is immediately followed by 1938 * another separator char, an empty String will be put to the 1939 * {@code Stream}.</p> 1940 * <p>Beginning and end of the String are treated as 1941 * separators, so if the first character of the String is a separator, the 1942 * returned {@code Stream} will start with an empty String, as it will end 1943 * with an empty String if the last character is a separator.</p> 1944 * <p>In case the String is empty, the return value will be a 1945 * {@code Stream} containing just the empty String. It will not be 1946 * empty.</p> 1947 * 1948 * @param input The String to split. 1949 * @param separator The code point for the separator character. 1950 * @return A {@code Stream} instance with the parts of the String. 1951 * 1952 * @since 0.0.7 1953 */ 1954 @API( status = STABLE, since = "0.0.7" ) 1955 public static final Stream<String> stream( final CharSequence input, final int separator ) 1956 { 1957 //---* Process the string *-------------------------------------------- 1958 final var codepoints = requireNonNullArgument( input, "input" ).codePoints().toArray(); 1959 final var builder = Stream.<String>builder(); 1960 var begin = -1; 1961 for( var i = 0 ; i < codepoints.length; ++i ) 1962 { 1963 if( begin == -1 ) 1964 { 1965 begin = i; 1966 } 1967 if( codepoints [i] == separator ) 1968 { 1969 builder.add( new String( codepoints, begin, i - begin ).intern() ); 1970 begin = -1; 1971 } 1972 } 1973 1974 //---* Add the rest *-------------------------------------------------- 1975 if( begin >= 0 ) 1976 { 1977 builder.add( new String( codepoints, begin, codepoints.length - begin ).intern() ); 1978 } 1979 if( (codepoints.length == 0) || (codepoints [codepoints.length - 1] == separator) ) 1980 { 1981 builder.add( EMPTY_STRING ); 1982 } 1983 1984 //---* Create the return value *--------------------------------------- 1985 final var retValue = builder.build(); 1986 1987 //---* Done *---------------------------------------------------------- 1988 return retValue; 1989 } // stream() 1990 1991 /** 1992 * <p>{@summary Splits a String by the given separator sequence and 1993 * returns an instance of 1994 * {@link Stream} 1995 * containing all parts.} In case a separator sequence is immediately 1996 * followed by another separator sequence, an empty String will be put to 1997 * the {@code Stream}.</p> 1998 * <p>Beginning and end of the String are treated as separators, so if the 1999 * first part of the String equals the separator sequence, the returned 2000 * {@code Stream} will start with an empty string, as it will end with an 2001 * empty String if the last part would equal the separator sequence.</p> 2002 * <p>In case the String is empty, the return value will be a 2003 * {@code Stream} containing just the empty String. It will not be 2004 * empty.</p> 2005 * 2006 * @param input The String to split. 2007 * @param separator The separator sequence. 2008 * @return The parts of the String. 2009 * 2010 * @since 0.0.7 2011 */ 2012 @API( status = STABLE, since = "0.0.7" ) 2013 public static final Stream<String> stream( final CharSequence input, final CharSequence separator ) 2014 { 2015 //---* Process the string *-------------------------------------------- 2016 var s = requireNonNullArgument( input, "input" ).toString(); 2017 final var t = requireNotEmptyArgument( separator, "separator" ).toString(); 2018 2019 final var builder = Stream.<String>builder(); 2020 var pos = Integer.MAX_VALUE; 2021 while( isNotEmpty( s ) && (pos >= 0) ) 2022 { 2023 pos = s.indexOf( t ); 2024 switch( Integer.signum( pos ) ) 2025 { 2026 case 0 -> /* String starts with separator */ 2027 { 2028 builder.add( EMPTY_STRING ); 2029 s = s.substring( t.length() ); 2030 } 2031 case 1 -> /* String contains a separator somewhere */ 2032 { 2033 builder.add( s.substring( 0, pos ) ); 2034 s = s.substring( pos + t.length() ); 2035 } 2036 default -> { /* Just leave the loop */ } 2037 } // ResultHandlerSwitch: 2038 } 2039 2040 //---* Add the rest *-------------------------------------------------- 2041 builder.add( s ); 2042 2043 //---* Create the return value *--------------------------------------- 2044 final var retValue = builder.build(); 2045 2046 //---* Done *---------------------------------------------------------- 2047 return retValue; 2048 } // stream() 2049 2050 /** 2051 * <p>{@summary Splits a String using the given regular expression and 2052 * returns an instance of 2053 * {@link Stream} 2054 * providing all parts.} In case a separator sequence is immediately 2055 * followed by another separator sequence, an empty String will be put to 2056 * the {@code Stream}.</p> 2057 * <p>Beginning and end of the String are treated as separators, so if the 2058 * first part of the String equals the separator sequence, the returned 2059 * {@code Stream} will start with an empty string, as it will end with an 2060 * empty String if the last part would equal the separator sequence.</p> 2061 * <p>In case the String is empty, the return value will be a 2062 * {@code Stream} containing just the empty String. It will not be 2063 * empty.</p> 2064 * 2065 * @note This method behaves different from 2066 * {@link String#split(String)} 2067 * as it will return trailing empty Strings. 2068 * 2069 * @param input The String to split. 2070 * @param pattern The separator sequence. 2071 * @return The parts of the String. 2072 * 2073 * @see String#split(String) 2074 * @see Pattern#split(CharSequence) 2075 * 2076 * @since 0.0.7 2077 */ 2078 @API( status = STABLE, since = "0.0.7" ) 2079 public static final Stream<String> stream( final CharSequence input, final Pattern pattern ) 2080 { 2081 requireNonNullArgument( pattern, "pattern" ); 2082 2083 //---* Process the string *-------------------------------------------- 2084 final var builder = Stream.<String>builder(); 2085 if( isEmpty( requireNonNullArgument( input, "s" ) ) ) 2086 { 2087 builder.add( EMPTY_STRING ); 2088 } 2089 else 2090 { 2091 final var parts = pattern.split( input ); 2092 for( final var part : parts ) 2093 { 2094 builder.add( part ); 2095 } 2096 final var matcher = pattern.matcher( input ); 2097 var count = 0; 2098 while( matcher.find() ) ++count; 2099 //noinspection ForLoopWithMissingComponent 2100 for( ; count >= parts.length; --count ) 2101 { 2102 builder.add( EMPTY_STRING ); 2103 } 2104 } 2105 2106 //---* Create the return value *--------------------------------------- 2107 final var retValue = builder.build(); 2108 2109 //---* Done *---------------------------------------------------------- 2110 return retValue; 2111 } // stream() 2112 2113 /** 2114 * <p>{@summary Strips HTML or XML tags from the given String, without 2115 * touching other entities (like {@code &} or {@code }).} The 2116 * result would be the effective text, stripped from all other whitespace 2117 * (except single blanks).</p> 2118 * <p>This means that the result for</p> 2119 * <div class="source-container"><pre>stripTags( """ 2120 * <html> 2121 * <head> 2122 * … 2123 * </head> 2124 * <body> 2125 * <a href='…'> Simple <br> 2126 * <br> Text </a> 2127 * </body> 2128 * </html>""" )</pre></div> would be just 2129 * <p>"{@code Simple Text}".</p> 2130 * <p>Comments will be stripped as well, and {@code <pre>} tags are not 2131 * interpreted, with the consequence that any formatting with whitespace 2132 * gets lost. {@code CDATA} elements are stripped, too.</p> 2133 * 2134 * @param input The HTML/XML string. 2135 * @return The string without the tags. 2136 * 2137 * @since 0.0.7 2138 */ 2139 @API( status = STABLE, since = "0.0.5" ) 2140 public static final String stripTags( final CharSequence input ) 2141 { 2142 final var retValue = new StringBuilder(); 2143 if( isNotEmptyOrBlank( requireNonNullArgument( input, "input" ) ) ) 2144 { 2145 final var matcher = m_TagRemovalPattern.matcher( input ); 2146 final var buffer = matcher.replaceAll( " " ).trim().codePoints().toArray(); 2147 int lastChar = NULL_CHAR; 2148 ScanLoop: for( final var codePoint : buffer ) 2149 { 2150 if( isWhitespace( codePoint ) ) 2151 { 2152 //---* Consecutive whitespace detected *------------------- 2153 if( isWhitespace( lastChar ) ) continue ScanLoop; 2154 2155 //---* All resulting whitespace have to be blanks *-------- 2156 retValue.append( " " ); 2157 } 2158 else 2159 { 2160 //---* Write the character *------------------------------- 2161 retValue.append( toChars( codePoint ) ); 2162 } 2163 lastChar = codePoint; 2164 } // ScanLoop: 2165 } 2166 2167 //---* Done *---------------------------------------------------------- 2168 return retValue.toString(); 2169 } // stripTags() 2170 2171 /** 2172 * <p>{@summary Strips characters from the given input that are not 2173 * allowed (or should be at least avoided) for a file or folder name on 2174 * most or all operating systems.}</p> 2175 * <p>The following characters will be stripped:</p> 2176 * <dl> 2177 * <dt><b>:</b> (colon)</dt><dd>On Windows systems it is used to separate 2178 * the drive letter from the path and file name; on Unix-like operating 2179 * systems (including MacOS) it would be valid, but it can cause issues on 2180 * the {@code PATH} and {@code CLASSPATH} variables on these operating 2181 * systems.</dd> 2182 * <dt><b>\</b> (backslash)</dt><dd>On Windows systems it is used as the 2183 * path separator, while on Unix-like operating systems it is problematic 2184 * in other ways. For example, it is used to escape blanks in not-quoted 2185 * file or folder names.</dd> 2186 * <dt><b>/</b> (slash or forward slash)</dt><dd>The path separator on 2187 * Unix-like operating systems, but Java will use it that way on Windows 2188 * systems, too.</dd> 2189 * <dt><b>;</b> (semicolon)</dt><dd>It can cause issues on the {@code PATH} 2190 * and {@code CLASSPATH} variables on Windows.</dd> 2191 * <dt><b>*</b> (asterisk)</dt><dd>The asterisk is often used as wild card 2192 * character in shell programs to find groups of files; using it in a file 2193 * name can cause funny effects.</dd> 2194 * <dt><b>?</b> (question mark)</dt><dd>The question mark is used on 2195 * Windows as a wild card for a single character; similar to the asterisk, 2196 * it can cause funny effects when used in a file name.</dd> 2197 * <dt><b>"</b> (double quotes)</dt> 2198 * <dt><b>'</b> (single quotes)</dt><dd>Both have some potential to 2199 * confuse the various shell programs of all operating systems.</dd> 2200 * <dt><b>@</b> ('at'-sign)</dt><dd>Although it is allowed for file and 2201 * folder names, it causes issues when used in the URL for that respective 2202 * file.</dd> 2203 * <dt><b>|</b> (pipe symbol)</dt><dd>Similar to the '*' (asterisk), the 2204 * pipe-symbol has – as the name already indicates - a meaning on most 2205 * shells that would make it difficult to manage files that contains this 2206 * character in their names.</dd> 2207 * <dt><b><</b> (less than)</dt> 2208 * <dt><b>></b> (greater than)</dt><dd>Like the pipe, these two have a 2209 * meaning on most shells that would make it difficult to manage files 2210 * that contains one of these characters in their names.</dd> 2211 * <dt>Whitespace</dt><dd>Only blanks will remain, any other whitespace 2212 * characters are stripped.</dd> 2213 * </dl> 2214 * <p>Finally, the method will strip all leading and trailing blanks; 2215 * although blanks are usually allowed, they are confusing when not 2216 * surrounded by some visible characters.</p> 2217 * <p>Especially regarding the characters that are critical for shells 2218 * ('*', '?', '"', ''', '|', '<', and '>') this method is 2219 * over-cautious, as most shells could handle them after proper escaping 2220 * the offending characters or quoting the file name.</p> 2221 * <p>This method furthermore assumes that any other Unicode character is 2222 * valid for a file or folder name; unfortunately, there are filesystems 2223 * where this is not true.</p> 2224 * 2225 * @note This method will not take care about the length of the returned 2226 * String; this means the result to a call to this method may still be 2227 * invalid as a file or folder name because it is too long. 2228 * 2229 * @param input The input String, denoting a file or folder name - 2230 * <i>not</i> a full path. 2231 * @return The String without the characters that are invalid for a file 2232 * name. This value will never be {@code null} or empty. 2233 * @throws NullArgumentException The input is {@code null}. 2234 * @throws EmptyArgumentException The input is the empty String. 2235 * @throws ValidationException After stripping the invalid characters the 2236 * return value would be empty. 2237 * 2238 * @since 0.0.5 2239 */ 2240 @SuppressWarnings( "SwitchStatementWithTooManyBranches" ) 2241 @API( status = STABLE, since = "0.0.5" ) 2242 public static final String stripToFilename( final CharSequence input ) throws ValidationException 2243 { 2244 final var len = requireNotEmptyArgument( input, "input" ).length(); 2245 final var buffer = new StringBuilder( len ); 2246 ScanLoop: for( var i = 0; i < len; ++i ) 2247 { 2248 final var currentCharacter = input.charAt( i ); 2249 Selector: 2250 //noinspection SwitchStatementWithTooManyBranches,EnhancedSwitchMigration 2251 switch( currentCharacter ) 2252 { 2253 case ':': 2254 case '\\': 2255 case '/': 2256 case ';': 2257 case '*': 2258 case '"': 2259 case '\'': 2260 case '@': 2261 case '|': 2262 case '?': 2263 case '<': 2264 case '>': 2265 continue ScanLoop; 2266 2267 default: 2268 { 2269 if( (currentCharacter == ' ') || (!isISOControl( currentCharacter ) && !isWhitespace( currentCharacter )) ) 2270 { 2271 buffer.append( currentCharacter ); 2272 } 2273 break Selector; 2274 } 2275 } // Selector: 2276 } // ScanLoop: 2277 2278 final var retValue = buffer.toString().trim(); 2279 if( retValue.isEmpty() ) 2280 { 2281 throw new ValidationException( "After stripping the invalid characters from '%1$s' there do not remain enough characters for a valid file name".formatted( input.toString() ) ); 2282 } 2283 2284 //---* Done *---------------------------------------------------------- 2285 return retValue; 2286 } // stripToFilename() 2287 2288 /** 2289 * Strips HTML or XML comments from the given String. 2290 * 2291 * @param input The HTML/XML string. 2292 * @return The string without the comments. 2293 * 2294 * @since 0.0.5 2295 */ 2296 @API( status = STABLE, since = "0.0.5" ) 2297 public static final String stripXMLComments( final CharSequence input ) 2298 { 2299 final var matcher = m_CommentRemovalPattern.matcher( requireNonNullArgument( input, "input" ) ); 2300 final var retValue = matcher.replaceAll( EMPTY_STRING ); 2301 2302 //---* Done *---------------------------------------------------------- 2303 return retValue; 2304 } // stripXMLComments() 2305 2306 /** 2307 * <p>{@summary Gets the String that is nested in between two Strings.} 2308 * Only the first match is returned.</p> 2309 * <p>A {@code null} input String returns {@code null}. A {@code null} 2310 * open/close returns {@code null} (no match). An empty ("") 2311 * open and close returns an empty string.</p> 2312 * <pre><code> 2313 * substringBetween( "wx[b]yz", "[", "]" ) = "b" 2314 * substringBetween( null, *, * ) = Optional.empty() 2315 * substringBetween( *, null, * ) = Optional.empty() 2316 * substringBetween( *, *, null ) = Optional.empty() 2317 * substringBetween( "", "", "" ) = "" 2318 * substringBetween( "", "", "]" ) = Optional.empty() 2319 * substringBetween( "", "[", "]" ) = Optional.empty() 2320 * substringBetween( "yabcz", "", "" ) = "" 2321 * substringBetween( "yabcz", "y", "z" ) = "abc" 2322 * substringBetween( "yabczyabcz", "y", "z" ) = "abc" 2323 * </code></pre> 2324 * 2325 * @inspired Apache Commons Lang 2326 * 2327 * @param input The String containing the substring, may be 2328 * {@code null}. 2329 * @param open The String before the substring, may be {@code null}. 2330 * @param close The String after the substring, may be {@code null}. 2331 * @return An instance of 2332 * {@link Optional} 2333 * that holds the found substring; will be 2334 * {@linkplain Optional#empty() empty} if no match 2335 * 2336 * @since 0.4.8 2337 */ 2338 @API( status = STABLE, since = "0.4.8" ) 2339 public static final Optional<String> substringBetween( final String input, final String open, final String close ) 2340 { 2341 String found = null; 2342 2343 if( Stream.of( input, open, close ).allMatch( Objects::nonNull ) ) 2344 { 2345 if( open.isEmpty() && close.isEmpty() ) 2346 { 2347 found = EMPTY_STRING; 2348 } 2349 else 2350 { 2351 final var start = input.indexOf(open); 2352 if( start >= 0 ) 2353 { 2354 final var end = input.indexOf( close, start + open.length() ); 2355 if( end > 0 ) 2356 { 2357 found = input.substring( start + open.length(), end ); 2358 } 2359 } 2360 } 2361 } 2362 final var retValue = Optional.ofNullable( found ); 2363 2364 //---* Done *---------------------------------------------------------- 2365 return retValue; 2366 } // substringBetween() 2367 2368 /** 2369 * <p>{@summary Searches a String for substrings delimited by a start and 2370 * end tag, returning all matching substrings in a 2371 * {@link java.util.SequencedCollection Collection}.} That collection is 2372 * empty if no match was found.</p> 2373 * <p>No match can be found in a {@code null} input String; same for a 2374 * {@code null} or an empty ("") open or close.</p> 2375 * <pre><code> 2376 * substringsBetween( "[a][b][c]", "[", "]" ) = ["a","b","c"] 2377 * substringsBetween( null, *, * ) = [] 2378 * substringsBetween( *, null, * ) = [] 2379 * substringsBetween( *, *, null ) = [] 2380 * substringsBetween( "", "[", "]" ) = [] 2381 * </code></pre> 2382 * 2383 * @param input The String containing the substrings, may be 2384 * {@code null}. 2385 * @param open The String identifying the start of the substring, may 2386 * be {@code null}. 2387 * @param close The String identifying the end of the substring, may be 2388 * {@code null}. 2389 * @return A 2390 * {@link SequencedCollection Collection} 2391 * with the found substrings, in the sequence they have in the input 2392 * String. The collection is mutable. 2393 * 2394 * @since 0.4.8 2395 */ 2396 @API( status = STABLE, since = "0.4.8" ) 2397 public static final SequencedCollection<String> substringsBetween( final String input, final String open, final String close) 2398 { 2399 final SequencedCollection<String> retValue = new ArrayList<>(); 2400 2401 if( Stream.of( input, open, close ).allMatch( StringUtils::isNotEmpty ) ) 2402 { 2403 final var strLen = input.length(); 2404 final var closeLen = close.length(); 2405 final var openLen = open.length(); 2406 var pos = 0; 2407 ScanLoop: while( pos < strLen - closeLen ) 2408 { 2409 var start = input.indexOf( open, pos ); 2410 if( start < 0 ) break ScanLoop; 2411 start += openLen; 2412 final var end = input.indexOf( close, start ); 2413 if (end < 0) break ScanLoop; 2414 retValue.add( input.substring( start, end ) ); 2415 pos = end + closeLen; 2416 } // ScanLoop: 2417 } 2418 2419 //---* Done *---------------------------------------------------------- 2420 return retValue; 2421 } // substringsBetween() 2422 2423 /** 2424 * Unescapes a string containing entity escapes to a string containing the 2425 * actual Unicode characters corresponding to the escapes. Supports HTML 2426 * 5.0 entities.<br> 2427 * <br>For example, the string 2428 * "&lt;Fran&ccedil;ais&gt;" will become 2429 * "<Français>".<br> 2430 * <br>If an entity is unrecognised, it is left alone, and inserted 2431 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2432 * will become ">&zzzz;x". 2433 * 2434 * @param input The {@code String} to unescape, may be {@code null}. 2435 * @return A new unescaped {@code String}, {@code null} if the given 2436 * string was already {@code null}. 2437 * 2438 * @see #escapeHTML(CharSequence) 2439 * @see #escapeHTML(Appendable,CharSequence) 2440 * 2441 * @since 0.0.5 2442 */ 2443 @API( status = STABLE, since = "0.0.5" ) 2444 public static final String unescapeHTML( final CharSequence input ) 2445 { 2446 final var retValue = nonNull( input ) ? HTML50.unescape( input ) : null; 2447 2448 //---* Done *---------------------------------------------------------- 2449 return retValue; 2450 } // unescapeHTML() 2451 2452 /** 2453 * Unescapes a string containing entity escapes to a string containing the 2454 * actual Unicode characters corresponding to the escapes and writes it to 2455 * the given 2456 * {@link Appendable}. 2457 * Supports HTML 4.0 entities.<br> 2458 * <br>For example, the string 2459 * "&lt;Fran&ccedil;ais&gt;" will become 2460 * "<Français>".<br> 2461 * <br>If an entity is unrecognised, it is left alone, and inserted 2462 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2463 * will become ">&zzzz;x". 2464 * 2465 * @param appendable The appendable receiving the unescaped string. 2466 * @param input The {@code String} to unescape, may be {@code null}. 2467 * @throws NullArgumentException The appendable is {@code null}. 2468 * @throws IOException An IOException occurred. 2469 * 2470 * @see #escapeHTML(CharSequence) 2471 * 2472 * @since 0.0.5 2473 */ 2474 @API( status = STABLE, since = "0.0.5" ) 2475 public static final void unescapeHTML( final Appendable appendable, final CharSequence input ) throws IOException 2476 { 2477 requireNonNullArgument( appendable, "appendable" ); 2478 2479 if( nonNull( input ) ) HTML50.unescape( appendable, input ); 2480 } // unescapeHTML() 2481 2482 /** 2483 * <p>{@summary Unescapes an XML string containing XML entity escapes to a 2484 * string containing the actual Unicode characters corresponding to the 2485 * escapes.}</p> 2486 * <p>If an entity is unrecognised, it is left alone, and inserted 2487 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2488 * will become ">&zzzz;x".</p> 2489 * 2490 * @param input The {@code String} to unescape, may be {@code null}. 2491 * @return A new unescaped {@code String}, {@code null} if the given 2492 * string was already {@code null}. 2493 * 2494 * @see #escapeXML(CharSequence) 2495 * @see #escapeXML(Appendable,CharSequence) 2496 * 2497 * @since 0.0.5 2498 */ 2499 @API( status = STABLE, since = "0.0.5" ) 2500 public static final String unescapeXML( final CharSequence input ) 2501 { 2502 final var retValue = nonNull( input ) ? XML.unescape( input ) : null; 2503 2504 //---* Done *---------------------------------------------------------- 2505 return retValue; 2506 } // unescapeXML() 2507 2508 /** 2509 * <p>{@summary Unescapes an XML String containing XML entity escapes to a 2510 * String containing the actual Unicode characters corresponding to the 2511 * escapes and writes it to the given 2512 * {@link Appendable}.}</p> 2513 * <p>If an entity is unrecognised, it is left alone, and inserted 2514 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2515 * will become ">&zzzz;x".</p> 2516 * 2517 * @param appendable The appendable receiving the unescaped string. 2518 * @param input The {@code String} to unescape, may be {@code null}. 2519 * @throws NullArgumentException The writer is {@code null}. 2520 * @throws IOException An IOException occurred. 2521 * 2522 * @see #escapeXML(CharSequence) 2523 * 2524 * @since 0.0.5 2525 */ 2526 @API( status = STABLE, since = "0.0.5" ) 2527 public static final void unescapeXML( final Appendable appendable, final CharSequence input ) throws IOException 2528 { 2529 requireNonNullArgument( appendable, "appendable" ); 2530 2531 if( nonNull( input ) ) XML.unescape( appendable, input ); 2532 } // unescapeXML() 2533 2534 /** 2535 * Returns the given URL encoded String in its decoded form, using the 2536 * UTF-8 character encoding.<br> 2537 * <br>Internally, this method and 2538 * {@link #urlEncode(CharSequence)} 2539 * make use of the methods from 2540 * {@link java.net.URLDecoder} 2541 * and 2542 * {@link java.net.URLEncoder}, respectively. The methods here were 2543 * introduced to simplify the handling, as first only the UTF-8 encoding 2544 * should be used - making the second argument for the methods 2545 * {@link java.net.URLDecoder#decode(String, String) decode()}/ 2546 * {@link java.net.URLEncoder#encode(String, String) encode()} 2547 * obsolete - and second, they could throw an 2548 * {@link UnsupportedEncodingException} - although this should never occur 2549 * when UTF-8 encoding is used. 2550 * 2551 * @param input The input String. 2552 * @return The decoded result. 2553 * 2554 * @see java.net.URLDecoder#decode(String, String) 2555 * 2556 * @since 0.0.5 2557 */ 2558 @API( status = STABLE, since = "0.0.5" ) 2559 public static final String urlDecode( final CharSequence input ) 2560 { 2561 final var retValue = decode( requireNonNullArgument( input, "input" ).toString(), UTF8 ); 2562 2563 //---* Done *---------------------------------------------------------- 2564 return retValue; 2565 } // urlDecode() 2566 2567 /** 2568 * Returns the given String in its URL encoded form, using the 2569 * UTF-8 character encoding. 2570 * 2571 * @param input The input String. 2572 * @return The URL encoded result. 2573 * 2574 * @see java.net.URLEncoder#encode(String, String) 2575 * @see #urlDecode(CharSequence) 2576 * 2577 * @since 0.0.5 2578 */ 2579 @API( status = STABLE, since = "0.0.5" ) 2580 public static final String urlEncode( final CharSequence input ) 2581 { 2582 final var retValue = encode( requireNonNullArgument( input, "input" ).toString(), UTF8 ); 2583 2584 //---* Done *---------------------------------------------------------- 2585 return retValue; 2586 } // urlEncode() 2587} 2588// class StringUtils 2589 2590/* 2591 * End of File 2592 */