001/* 002 * ============================================================================ 003 * Copyright © 20002-2024 by Thomas Thrien. 004 * All Rights Reserved. 005 * ============================================================================ 006 * Licensed to the public under the agreements of the GNU Lesser General Public 007 * License, version 3.0 (the "License"). You may obtain a copy of the License at 008 * 009 * http://www.gnu.org/licenses/lgpl.html 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT 013 * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the 014 * License for the specific language governing permissions and limitations 015 * under the License. 016 */ 017 018package org.tquadrat.foundation.util; 019 020import static java.lang.Character.charCount; 021import static java.lang.Character.isISOControl; 022import static java.lang.Character.isValidCodePoint; 023import static java.lang.Character.isWhitespace; 024import static java.lang.Character.toChars; 025import static java.lang.Character.toLowerCase; 026import static java.lang.Character.toTitleCase; 027import static java.lang.Character.toUpperCase; 028import static java.lang.Integer.min; 029import static java.net.URLDecoder.decode; 030import static java.net.URLEncoder.encode; 031import static java.text.Normalizer.Form.NFD; 032import static java.text.Normalizer.normalize; 033import static java.util.regex.Pattern.DOTALL; 034import static java.util.regex.Pattern.compile; 035import static org.apiguardian.api.API.Status.STABLE; 036import static org.tquadrat.foundation.lang.CommonConstants.CHAR_ELLIPSIS; 037import static org.tquadrat.foundation.lang.CommonConstants.EMPTY_STRING; 038import static org.tquadrat.foundation.lang.CommonConstants.NULL_CHAR; 039import static org.tquadrat.foundation.lang.CommonConstants.UTF8; 040import static org.tquadrat.foundation.lang.Objects.isNull; 041import static org.tquadrat.foundation.lang.Objects.nonNull; 042import static org.tquadrat.foundation.lang.Objects.requireNonNullArgument; 043import static org.tquadrat.foundation.lang.Objects.requireNotEmptyArgument; 044import static org.tquadrat.foundation.util.CharSetUtils.escapeCharacter; 045import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_ABBREVIATE; 046import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_ABBREVIATE_MIDDLE; 047import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_CUT; 048import static org.tquadrat.foundation.util.StringUtils.Clipping.CLIPPING_NONE; 049import static org.tquadrat.foundation.util.StringUtils.Padding.PADDING_CENTER; 050import static org.tquadrat.foundation.util.StringUtils.Padding.PADDING_LEFT; 051import static org.tquadrat.foundation.util.StringUtils.Padding.PADDING_RIGHT; 052import static org.tquadrat.foundation.util.internal.Entities.HTML50; 053import static org.tquadrat.foundation.util.internal.Entities.XML; 054 055import java.io.IOException; 056import java.io.UnsupportedEncodingException; 057import java.util.ArrayList; 058import java.util.Arrays; 059import java.util.Collection; 060import java.util.List; 061import java.util.Optional; 062import java.util.SequencedCollection; 063import java.util.regex.Pattern; 064import java.util.regex.PatternSyntaxException; 065import java.util.stream.Stream; 066import java.util.stream.Stream.Builder; 067 068import org.apiguardian.api.API; 069import org.tquadrat.foundation.annotation.ClassVersion; 070import org.tquadrat.foundation.annotation.UtilityClass; 071import org.tquadrat.foundation.exception.CharSequenceTooLongException; 072import org.tquadrat.foundation.exception.EmptyArgumentException; 073import org.tquadrat.foundation.exception.ImpossibleExceptionError; 074import org.tquadrat.foundation.exception.NullArgumentException; 075import org.tquadrat.foundation.exception.PrivateConstructorForStaticClassCalledError; 076import org.tquadrat.foundation.exception.ValidationException; 077import org.tquadrat.foundation.lang.Objects; 078 079/** 080 * Library of utility methods that are useful when dealing with Strings. <br> 081 * <br>Parts of the code were adopted from the class 082 * <code>org.apache.commons.lang.StringUtils</code> and modified to match the 083 * requirements of this project. In particular, these are the methods 084 * <ul> 085 * <li>{@link #abbreviate(CharSequence, int) abbreviate()}</li> 086 * <li>{@link #capitalize(CharSequence) capitalize()}</li> 087 * <li>{@link #escapeHTML(CharSequence) escapeHTML()} in both versions</li> 088 * <li>{@link #isEmpty(CharSequence) isEmpty()}</li> 089 * <li>{@link #isNotEmpty(CharSequence) isNotEmpty()}</li> 090 * <li>{@link #repeat(CharSequence, int) repeat()}</li> 091 * <li>{@link #unescapeHTML(CharSequence) unescapeHTML()} in both versions</li> 092 * </ul> 093 * 094 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 095 * @version $Id: StringUtils.java 1138 2024-06-03 22:46:54Z tquadrat $ 096 * @since 0.0.3 097 * 098 * @UMLGraph.link 099 */ 100@SuppressWarnings( {"ClassWithTooManyMethods", "OverlyComplexClass"} ) 101@ClassVersion( sourceVersion = "$Id: StringUtils.java 1138 2024-06-03 22:46:54Z tquadrat $" ) 102@UtilityClass 103public final class StringUtils 104{ 105 /*---------------*\ 106 ====** Inner Classes **==================================================== 107 \*---------------*/ 108 /** 109 * The clipping mode that is used for the method 110 * {@link StringUtils#pad(CharSequence,int,char,Padding,Clipping)} 111 * 112 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 113 * @version $Id: StringUtils.java 1138 2024-06-03 22:46:54Z tquadrat $ 114 * @since 0.0.3 115 * 116 * @UMLGraph.link 117 */ 118 @SuppressWarnings( "InnerClassTooDeeplyNested" ) 119 @ClassVersion( sourceVersion = "$Id: StringUtils.java 1138 2024-06-03 22:46:54Z tquadrat $" ) 120 @API( status = STABLE, since = "0.0.5" ) 121 public static enum Clipping 122 { 123 /*------------------*\ 124 ====** Enum Declaration **============================================= 125 \*------------------*/ 126 /** 127 * If an input String is already longer than the target length, it 128 * will be returned unchanged. 129 */ 130 CLIPPING_NONE 131 { 132 /** 133 * {@inheritDoc} 134 */ 135 @Override 136 protected final String clip( final CharSequence input, final int length ) { return input.toString(); } 137 }, 138 139 /** 140 * If an input String is longer than the target length, it will be 141 * just shortened to that length. 142 */ 143 CLIPPING_CUT 144 { 145 /** 146 * {@inheritDoc} 147 */ 148 @Override 149 protected final String clip( final CharSequence input, final int length ) 150 { 151 final var retValue = ( 152 input.length() > length ? input.subSequence( 0, length ) : 153 input).toString(); 154 155 //---* Done *-------------------------------------------------- 156 return retValue; 157 } // clip() 158 }, 159 160 /** 161 * If an input String is longer than the target length, it will be 162 * abbreviated to that length, by calling 163 * {@link StringUtils#abbreviate(CharSequence, int)} 164 * with that String. The minimum length for the padded String is 4. 165 */ 166 CLIPPING_ABBREVIATE 167 { 168 /** 169 * {@inheritDoc} 170 */ 171 @Override 172 protected final String clip( final CharSequence input, final int length ) { return abbreviate( input, length ); } 173 }, 174 175 /** 176 * If an input String is longer than the target length, it will be 177 * abbreviated to that length, by calling 178 * {@link StringUtils#abbreviateMiddle(CharSequence, int)} 179 * with that String. The minimum length for the padded String is 5. 180 */ 181 CLIPPING_ABBREVIATE_MIDDLE 182 { 183 /** 184 * {@inheritDoc} 185 */ 186 @Override 187 protected final String clip( final CharSequence input, final int length ) { return abbreviateMiddle( input, length ); } 188 }; 189 190 /*---------*\ 191 ====** Methods **====================================================== 192 \*---------*/ 193 /** 194 * Clips the given input String. 195 * 196 * @param input The input String. 197 * @param length The target length. 198 * @return The result String. 199 */ 200 protected abstract String clip( final CharSequence input, final int length ); 201 } 202 // enum Clipping 203 204 /** 205 * The padding mode that is used for the methods 206 * {@link StringUtils#pad(CharSequence,int,char,Padding,boolean)} 207 * and 208 * {@link StringUtils#pad(CharSequence,int,char,Padding,Clipping)} 209 * 210 * @extauthor Thomas Thrien - thomas.thrien@tquadrat.org 211 * @version $Id: StringUtils.java 1138 2024-06-03 22:46:54Z tquadrat $ 212 * @since 0.0.5 213 * 214 * @UMLGraph.link 215 */ 216 @SuppressWarnings( "InnerClassTooDeeplyNested" ) 217 @ClassVersion( sourceVersion = "$Id: StringUtils.java 1138 2024-06-03 22:46:54Z tquadrat $" ) 218 @API( status = STABLE, since = "0.0.5" ) 219 public static enum Padding 220 { 221 /*------------------*\ 222 ====** Enum Declaration **============================================= 223 \*------------------*/ 224 /** 225 * The pad characters are distributed evenly at begin and end of the 226 * string. 227 */ 228 PADDING_CENTER 229 { 230 /** 231 * {@inheritDoc} 232 */ 233 @Override 234 protected final String pad( final CharSequence input, final int padSize, final char c ) 235 { 236 final var rightSize = padSize / 2; 237 final var leftSize = padSize - rightSize; 238 final var retValue = padding( leftSize, c ) + input.toString() + padding( rightSize, c ); 239 240 //---* Done *-------------------------------------------------- 241 return retValue; 242 } // pad() 243 }, 244 245 /** 246 * The pad characters are added at the beginning of the string 247 * (prefixing it). 248 */ 249 PADDING_LEFT 250 { 251 /** 252 * {@inheritDoc} 253 */ 254 @Override 255 protected final String pad( final CharSequence input, final int padSize, final char c ) 256 { 257 return padding( padSize, c ) + input.toString(); 258 } // pad() 259 }, 260 261 /** 262 * The pad characters are added the end of the string (as a suffix). 263 */ 264 PADDING_RIGHT 265 { 266 /** 267 * {@inheritDoc} 268 */ 269 @Override 270 protected final String pad( final CharSequence input, final int padSize, final char c ) 271 { 272 return input.toString() + padding( padSize, c ); 273 } // pad() 274 }; 275 276 /*---------*\ 277 ====** Methods **====================================================== 278 \*---------*/ 279 /** 280 * Pads the given input String. 281 * 282 * @param input The input String. 283 * @param padSize The pad size. 284 * @param c The pad character. 285 * @return The result String. 286 */ 287 protected abstract String pad( final CharSequence input, final int padSize, final char c ); 288 289 /** 290 * <p>{@summary Returns padding using the specified pad character repeated to the 291 * given length.}</p> 292 * <br><code> 293 * Padding.padding( 0, 'e' ) ⇒ ""<br> 294 * Padding.padding( 3, 'e' ) ⇒ "eee"<br> 295 * Padding.padding( -2, 'e' ) ⇒ IndexOutOfBoundsException<br> 296 * </code> 297 * 298 * @param repeat Number of times to repeat {@code padChar}; must be 299 * 0 or greater. 300 * @param padChar Character to repeat. 301 * @return String with repeated {@code padChar} character, or the 302 * empty String if {@code repeat} is 0. 303 * @throws IndexOutOfBoundsException {@code repeat} is less than 0. 304 * 305 * @see StringUtils#repeat(int,int) 306 */ 307 private static String padding( final int repeat, final char padChar ) throws IndexOutOfBoundsException 308 { 309 if( repeat < 0 ) throw new IndexOutOfBoundsException( MSG_PadNegative.formatted( repeat ) ); 310 311 final var retValue = Character.toString( padChar ).repeat( repeat ).intern(); 312 313 //---* Done *------------------------------------------------------ 314 return retValue; 315 } // padding() 316 } 317 // enum Padding 318 319 /*-----------*\ 320 ====** Constants **======================================================== 321 \*-----------*/ 322 /** 323 * <p>{@summary The regular expression for an HTML or XML comment: 324 * {@value}.}</p> 325 * <p>This pattern is used by the 326 * {@link #stripXMLComments(CharSequence)} 327 * method.</p> 328 * 329 * @since 0.0.5 330 */ 331 @API( status = STABLE, since = "0.0.5" ) 332 public static final String COMMENTREMOVAL_PATTERN = "<!--.+?-->"; 333 334 /** 335 * The message text indicating that the given value for the abbreviation 336 * target length is too short. 337 */ 338 private static final String MSG_AbbrTooShort = "The minimum abbreviation width is %d"; 339 340 /** 341 * The message indicating that the give size for padding is negative. 342 */ 343 private static final String MSG_PadNegative = "Cannot pad a negative amount: %d"; 344 345 /** 346 * The maximum size to which the padding constant(s) can expand: {@value}. 347 * 348 * @see #repeat(CharSequence,int) repeat() for String 349 * @see #repeat(char,int) repeat() for char 350 */ 351 @SuppressWarnings( "unused" ) 352 private static final int PAD_LIMIT = 8192; 353 354 /** 355 * The regular expression for an HTML or XML tag: {@value}.<br> 356 * <br>This pattern is used by the 357 * {@link #stripTags(CharSequence)} 358 * method.<br> 359 * <br>As HTML/XML comments may contain a "greater than" sign 360 * ('>' or '&gt;'), it is necessary to treat comments 361 * separately.<br> 362 * <br>Just as a reminder: several sources recommend using the following 363 * idiom for embedded JavaScript:<pre><code> <script> 364 * <!-- 365 * <i>JavaScript code </i> 366 * --> 367 * </script></code></pre> 368 * 369 * @since 0.0.5 370 */ 371 @SuppressWarnings( "RegExpUnnecessaryNonCapturingGroup" ) 372 @API( status = STABLE, since = "0.0.5" ) 373 public static final String TAGREMOVAL_PATTERN = "(?:<!--.+?-->)|(?:<[^>]+?>)"; 374 375 /*------------------------*\ 376 ====** Static Initialisations **=========================================== 377 \*------------------------*/ 378 /** 379 * The pattern that is used to identify an HTML or XML comment. 380 * 381 * @see #stripXMLComments(CharSequence) 382 * @see #COMMENTREMOVAL_PATTERN 383 */ 384 private static final Pattern m_CommentRemovalPattern; 385 386 /** 387 * The pattern that is used to identify an HTML or XML tag. 388 * 389 * @see #stripTags(CharSequence) 390 * @see #TAGREMOVAL_PATTERN 391 */ 392 private static final Pattern m_TagRemovalPattern; 393 394 static 395 { 396 //---* The regex patterns *-------------------------------------------- 397 try 398 { 399 m_CommentRemovalPattern = compile( COMMENTREMOVAL_PATTERN, DOTALL ); 400 m_TagRemovalPattern = compile( TAGREMOVAL_PATTERN, DOTALL ); 401 } 402 catch( final PatternSyntaxException e ) 403 { 404 throw new ImpossibleExceptionError( "The patterns are constant values that have been tested", e ); 405 } 406 } 407 408 /*--------------*\ 409 ====** Constructors **===================================================== 410 \*--------------*/ 411 /** 412 * No instance of this class is allowed. 413 */ 414 private StringUtils() { throw new PrivateConstructorForStaticClassCalledError( StringUtils.class ); } 415 416 /*---------*\ 417 ====** Methods **========================================================== 418 \*---------*/ 419 /** 420 * Abbreviates a String using ellipses (Unicode HORIZONTAL ELLIPSIS, 421 * 0x2026). This will turn "<i>Now is the time for all good 422 * men</i>" into "<i>Now is the time for…</i>".<br> 423 * <br>Specifically: 424 * <ul> 425 * <li>If {@code text} is less than {@code maxWidth} characters long, 426 * return it unchanged.</li> 427 * <li>Else abbreviate it to <code>(substring( text, 0, max - 1 ) + 428 * "…" )</code>.</li> 429 * <li>If {@code maxWidth} is less than 4, throw an 430 * {@link ValidationException}.</li> 431 * <li>In no case it will return a String of length greater than 432 * {@code maxWidth}.</li> 433 * </ul> 434 * Some samples:<br> 435 * <pre><code> 436 * StringUtils.abbreviate( null, * ) = null 437 * StringUtils.abbreviate( "", 4 ) = "" 438 * StringUtils.abbreviate( "abc", 4 ) = "abc" 439 * StringUtils.abbreviate( "abcd", 4 ) = "abcd;" 440 * StringUtils.abbreviate( "abcdefg", 4 ) = "abc…" 441 * StringUtils.abbreviate( "abcdefg", 7 ) = "abcdefg" 442 * StringUtils.abbreviate( "abcdefg", 8 ) = "abcdefg" 443 * StringUtils.abbreviate( "abcdefg", 3 ) = IllegalArgumentException 444 * </code></pre> 445 * 446 * @param text The String to abbreviate, can be {@code null}. 447 * @param maxWidth The maximum length of result String, must be at 448 * least 4. 449 * @return The abbreviated String, or {@code null} if the input was 450 * already {@code null}. 451 * @throws ValidationException The value for {@code maxWidth} was less 452 * than 4. 453 * 454 * @since 0.0.5 455 */ 456 @API( status = STABLE, since = "0.0.5" ) 457 public static final String abbreviate( final CharSequence text, final int maxWidth ) throws ValidationException 458 { 459 return abbreviate( text, 0, maxWidth ); 460 } // abbreviate() 461 462 /** 463 * Abbreviates a String using ellipses (Unicode HORIZONTAL ELLIPSIS, 464 * 0x2026). This will turn "<i>Now is the time for all good 465 * men</i>" into "<i>…is the time 466 * for…</i>".<br> 467 * <br>Works like 468 * {@link #abbreviate(CharSequence, int)}, 469 * but allows to specify a "left edge" offset. Note that this 470 * left edge is not necessarily going to be the leftmost character in the 471 * result, or the first character following the ellipses, but it will 472 * appear somewhere in the result. An offset less than 0 will be treated 473 * as 0, a value greater than {@code maxWidth} will be ignored.<br> 474 * <br>In no case will it return a String of length greater than 475 * {@code maxWidth}.<br> 476 * <br>Some samples:<br> 477 * <pre> 478 * StringUtils.abbreviate( null, *, * ) = null 479 * StringUtils.abbreviate( "", 0, 4 ) = "" 480 * StringUtils.abbreviate( "abcdefghijklmno", -1, 10 ) = "abcdefghi…" 481 * StringUtils.abbreviate( "abcdefghijklmno", 0, 10 ) = "abcdefghi…" 482 * StringUtils.abbreviate( "abcdefghijklmno", 1, 10 ) = "abcdefghi…" 483 * StringUtils.abbreviate( "abcdefghijklmno", 4, 10 ) = "…efghijkl…" 484 * StringUtils.abbreviate( "abcdefghijklmno", 5, 10 ) = "…fghijklm…" 485 * StringUtils.abbreviate( "abcdefghijklmno", 6, 10 ) = "…ghijklmno" 486 * StringUtils.abbreviate( "abcdefghijklmno", 8, 10 ) = "…ghijklmno" 487 * StringUtils.abbreviate( "abcdefghijklmno", 10, 10 ) = "…ghijklmno" 488 * StringUtils.abbreviate( "abcdefghijklmno", 12, 10 ) = "…ghijklmno" 489 * StringUtils.abbreviate( "abcdefghij", 0, 3 ) = IllegalArgumentException 490 * StringUtils.abbreviate( "abcdefghij", 5, 6 ) = IllegalArgumentException 491 * </pre> 492 * 493 * @param text The String to process, can be {@code null}. 494 * @param offset The left edge of the source String; this value will not 495 * be checked. 496 * @param maxWidth The maximum length of result String, must be at 497 * least 4. 498 * @return The abbreviated String, or {@code null} if the input was 499 * already {@code null}. 500 * @throws ValidationException The value for {@code maxWidth} was less 501 * than 4. 502 * 503 * @since 0.0.5 504 */ 505 @API( status = STABLE, since = "0.0.5" ) 506 public static final String abbreviate( final CharSequence text, final int offset, final int maxWidth ) throws ValidationException 507 { 508 final var ellipsis = Character.toString( CHAR_ELLIPSIS ).intern(); 509 510 String retValue = null; 511 if( nonNull( text ) ) 512 { 513 if( maxWidth < 4 ) throw new ValidationException( String.format( MSG_AbbrTooShort, 4 ) ); 514 515 final var len = text.length(); 516 if( len > maxWidth ) 517 { 518 var effectiveOffset = min( offset, len); 519 if( (len - effectiveOffset) < (maxWidth - 1)) 520 { 521 effectiveOffset = len - (maxWidth - 1); 522 } 523 if( effectiveOffset <= 1 ) 524 { 525 retValue = text.subSequence( 0, maxWidth - 1 ) + ellipsis; 526 } 527 else 528 { 529 if( ((effectiveOffset + maxWidth) - 1) < len ) 530 { 531 retValue = ellipsis + abbreviate( text.subSequence( effectiveOffset, len ), maxWidth - 1 ); 532 } 533 else 534 { 535 retValue = ellipsis + text.subSequence( len - (maxWidth - 1), len ); 536 } 537 } 538 } 539 else 540 { 541 retValue = text.toString(); 542 } 543 } 544 545 //---* Done *---------------------------------------------------------- 546 return retValue; 547 } // abbreviate() 548 549 /** 550 * Abbreviates a String using ellipses (Unicode HORIZONTAL ELLIPSIS, 551 * 0x2026) in the middle of the returned text. This will turn "<i>Now 552 * is the time for all good men</i>" into "<i>Now is … 553 * good men</i>"<br> 554 * <br>Works like 555 * {@link #abbreviate(CharSequence, int)}.<br> 556 * <br>In no case will it return a String of length greater than 557 * {@code maxWidth}.<br> 558 * <br>Some samples:<br> 559 * <pre> 560 * StringUtils.abbreviateMiddle(null, *) = null 561 * StringUtils.abbreviateMiddle("", 5) = "" 562 * StringUtils.abbreviateMiddle("abcdefgh", 5) = "ab…gh" 563 * StringUtils.abbreviateMiddle("abcdefgh", 7) = "ab…gh" 564 * StringUtils.abbreviateMiddle("abcdefgh", 8) = "abcdefgh" 565 * StringUtils.abbreviateMiddle("abcdefgh", 4) = IllegalArgumentException 566 * </pre> 567 * 568 * @param input The String to check, may be {@code null}. 569 * @param maxWidth The maximum length of result String, must be at 570 * least 5. 571 * @return The abbreviated String, or {@code null} if the input was 572 * already {@code null}. 573 * @throws ValidationException The value for {@code maxWidth} was less 574 * than 5. 575 * 576 * @since 0.0.5 577 */ 578 @API( status = STABLE, since = "0.0.5" ) 579 public static final String abbreviateMiddle( final CharSequence input, final int maxWidth ) 580 { 581 final var ellipsis = Character.toString( CHAR_ELLIPSIS ).intern(); 582 583 String retValue = null; 584 if( nonNull( input ) ) 585 { 586 if( maxWidth < 5 ) throw new ValidationException( String.format( MSG_AbbrTooShort, 5 ) ); 587 588 final var len = input.length(); 589 if( len > maxWidth ) 590 { 591 final var suffixLength = (maxWidth - 1) / 2; 592 final var prefixLength = maxWidth - 1 - suffixLength; 593 final var suffixStart = len - suffixLength; 594 retValue = input.subSequence( 0, prefixLength ) + ellipsis + input.subSequence( suffixStart, suffixStart + suffixLength ); 595 } 596 else 597 { 598 retValue = input.toString(); 599 } 600 } 601 602 //---* Done *---------------------------------------------------------- 603 return retValue; 604 } // abbreviateMiddle() 605 606 /** 607 * <p>{@summary Breaks a long string into chunks of the given length.}</p> 608 * <p>This method returns an instance of 609 * {@link Stream} that can be easily converted into an array or a 610 * collection.</p> 611 * <p>To array:</p> 612 * <pre><code>breakString( <<i>string</i>>, <<i>chunk</i>> ).toArray( String []::new )</code></pre> 613 * <p>To collection (here: a 614 * {@link List}):</p> 615 * <pre><code>breakString( <<i>string</i>>, <<i>chunk</i>> ).collect( Collectors.toList() )</code></pre> 616 * 617 * @param input The string. 618 * @param chunk The chunk size. 619 * @return The chunks from the string; the last chunk could be shorter 620 * than the others. 621 * 622 * @see Stream#toArray(java.util.function.IntFunction) 623 * @see Stream#collect(java.util.stream.Collector) 624 * @see java.util.stream.Collectors#toList() 625 * 626 * @since 0.0.5 627 */ 628 @API( status = STABLE, since = "0.0.5" ) 629 public static final Stream<String> breakString( final CharSequence input, final int chunk ) 630 { 631 if( chunk < 1 ) throw new ValidationException( "Chunk size must not be zero or a negative number: %d".formatted( chunk ) ); 632 633 final Builder<String> builder = Stream.builder(); 634 final var len = requireNonNullArgument( input, "input" ).length(); 635 var pos = 0; 636 while( (pos + chunk) < len ) 637 { 638 builder.add( input.subSequence( pos, pos + chunk ).toString() ); 639 pos += chunk; 640 } 641 if( pos < len ) builder.add( input.subSequence( pos, len ).toString() ); 642 643 final var retValue = builder.build(); 644 645 //---* Done *---------------------------------------------------------- 646 return retValue; 647 } // breakString() 648 649 /** 650 * <p>{@summary Breaks a text into lines of the given length, but 651 * different from 652 * {@link #breakString(CharSequence, int)}, 653 * it will honour whitespace.}</p> 654 * <p>This method returns an instance of 655 * {@link Stream} that can be easily converted into an array, a String, or 656 * a collection.</p> 657 * <p>To array:</p> 658 * <pre><code>breakText( <<i>text</i>>, <<i>len</i>> ).toArray( String []::new )</code></pre> 659 * <p>To String:</p> 660 * <pre><code>breakText( <<i>text</i>>, <<i>len</i>> ).collect( Collectors.joining() )</code></pre> 661 * <p>To collection (here: a 662 * {@link List}):</p> 663 * <pre><code>breakText( <<i>text</i>>, <<i>len</i>> ).collect( Collectors.toList() )</code></pre> 664 * 665 * @param text The text. 666 * @param lineLength The length of a line. 667 * @return The lines; if a word is longer than the given line length, a 668 * line containing only that word can be longer that the given line 669 * length. 670 * 671 * @see Stream#toArray(java.util.function.IntFunction) 672 * @see Stream#collect(java.util.stream.Collector) 673 * @see java.util.stream.Collectors#joining() 674 * @see java.util.stream.Collectors#joining(CharSequence) 675 * @see java.util.stream.Collectors#joining(CharSequence, CharSequence, CharSequence) 676 * @see java.util.stream.Collectors#toList() 677 * 678 * @since 0.0.5 679 */ 680 @API( status = STABLE, since = "0.0.5" ) 681 public static final Stream<String> breakText( final CharSequence text, final int lineLength ) 682 { 683 if( lineLength < 1 ) throw new ValidationException( "Line length size must not be zero or a negative number: %d".formatted( lineLength ) ); 684 685 final Builder<String> builder = Stream.builder(); 686 687 for( final var line : splitString( requireNonNullArgument( text, "text" ), '\n' ) ) 688 { 689 if( isEmptyOrBlank( line ) ) 690 { 691 builder.add( EMPTY_STRING ); 692 } 693 else 694 { 695 final var buffer = new StringBuilder(); 696 final var chunks = line.split( "\\s" ); 697 SplitLoop: for( final var chunk : chunks ) 698 { 699 if( chunk.isEmpty() ) continue SplitLoop; 700 if( (buffer.length() + 1 + chunk.length()) < lineLength ) 701 { 702 if( isNotEmpty( buffer) ) buffer.append( ' ' ); 703 } 704 else 705 { 706 if( isNotEmpty( buffer ) ) 707 { 708 builder.add( buffer.toString() ); 709 buffer.setLength( 0 ); 710 } 711 } 712 buffer.append( chunk ); 713 } // SplitLoop: 714 if( isNotEmpty( buffer ) ) builder.add( buffer.toString() ); 715 } 716 } 717 718 final var retValue = builder.build(); 719 720 //---* Done *---------------------------------------------------------- 721 return retValue; 722 } // breakText() 723 724 /** 725 * <p>{@summary <i>Capitalises</i> a String, meaning changing the first 726 * letter to upper case as per 727 * {@link Character#toUpperCase(char)}.} No other letters are changed.</p> 728 * <p>A {@code null} input String returns {@code null}.</p> 729 * <p>Samples:</p> 730 * <pre><code> StringUtils.capitalize( null ) == null; 731 * StringUtils.capitalize( "" ) == ""; 732 * StringUtils.capitalize( "cat" ) == "Cat"; 733 * StringUtils.capitalize( "cAt" ) == "CAt";</code></pre> 734 * <p>Use this function to create a getter or setter name from the name of 735 * the attribute.</p> 736 * <p>This method does not recognise the 737 * {@linkplain java.util.Locale#getDefault() default locale}. 738 * This means that "istanbul" will become "Istanbul" 739 * even for the locale {@code tr_TR} (although "İstanbul" 740 * would be correct).</p> 741 * 742 * @param input The String to capitalise, can be {@code null}. 743 * @return The capitalised String, or {@code null} if the argument 744 * was already {@code null}. 745 * 746 * @see #decapitalize(CharSequence) 747 * 748 * @since 0.0.5 749 */ 750 @API( status = STABLE, since = "0.0.5" ) 751 public static final String capitalize( final CharSequence input ) 752 { 753 String retValue = null; 754 if( isNotEmpty( input ) ) 755 { 756 final var str = input.toString(); 757 final var firstCodePoint = str.codePointAt( 0 ); 758 final var newCodePoint = toUpperCase( firstCodePoint ); 759 if( firstCodePoint == newCodePoint ) 760 { 761 retValue = str; 762 } 763 else 764 { 765 final var strLen = str.length(); 766 final var newCodePoints = new int [strLen]; 767 var outOffset = 0; 768 newCodePoints [outOffset++] = newCodePoint; 769 //noinspection ForLoopWithMissingComponent 770 for( var inOffset = charCount( firstCodePoint ); inOffset < strLen; ) 771 { 772 final var codePoint = str.codePointAt( inOffset ); 773 newCodePoints [outOffset++] = codePoint; 774 inOffset += charCount( codePoint ); 775 } 776 retValue = new String( newCodePoints, 0, outOffset ); 777 } 778 } 779 else if( nonNull( input ) ) 780 { 781 retValue = EMPTY_STRING; 782 } 783 784 //---* Done *---------------------------------------------------------- 785 return retValue; 786 } // capitalize() 787 788 /** 789 * <p>{@summary <i>Capitalises</i> a String, meaning changing the first 790 * letter to upper case as per 791 * {@link Character#toTitleCase(char)}.} No other letters are changed.</p> 792 * <p>A {@code null} input String returns {@code null}.</p> 793 * <p>Samples:</p> 794 * <pre><code> StringUtils.capitalize( null ) == null; 795 * StringUtils.capitalize( "" ) == ""; 796 * StringUtils.capitalize( "cat" ) == "Cat"; 797 * StringUtils.capitalize( "cAt" ) == "CAt";</code></pre> 798 * <p>Use this function to create a getter or setter name from the name of 799 * the attribute.</p> 800 * <p>This method does not recognise the 801 * {@linkplain java.util.Locale#getDefault() default locale}. 802 * This means that "istanbul" will become "Istanbul" 803 * even for the locale {@code tr_TR} (although "İstanbul" 804 * would be correct).</p> 805 * 806 * @param input The String to capitalise, can be {@code null}. 807 * @return The capitalised String, or {@code null} if the argument 808 * was already {@code null}. 809 * 810 * @see #capitalize(CharSequence) 811 * @see #decapitalize(CharSequence) 812 * 813 * @since 0.4.8 814 */ 815 @API( status = STABLE, since = "0.4.8" ) 816 public static final String capitalizeToTitle( final CharSequence input ) 817 { 818 String retValue = null; 819 if( isNotEmpty( input ) ) 820 { 821 final var str = input.toString(); 822 final var firstCodePoint = str.codePointAt( 0 ); 823 final var newCodePoint = toTitleCase( firstCodePoint ); 824 if( firstCodePoint == newCodePoint ) 825 { 826 retValue = str; 827 } 828 else 829 { 830 final var strLen = str.length(); 831 final var newCodePoints = new int [strLen]; 832 var outOffset = 0; 833 newCodePoints [outOffset++] = newCodePoint; 834 //noinspection ForLoopWithMissingComponent 835 for( var inOffset = charCount( firstCodePoint ); inOffset < strLen; ) 836 { 837 final var codePoint = str.codePointAt( inOffset ); 838 newCodePoints [outOffset++] = codePoint; 839 inOffset += charCount( codePoint ); 840 } 841 retValue = new String( newCodePoints, 0, outOffset ); 842 } 843 } 844 else if( nonNull( input ) ) 845 { 846 retValue = EMPTY_STRING; 847 } 848 849 //---* Done *---------------------------------------------------------- 850 return retValue; 851 } // capitalizeToTitle() 852 853 /** 854 * Tests if the given text is not {@code null}, not empty and not 855 * longer than the given maximum length. Use this to check whether a 856 * String that is provided as an argument to a method is longer than 857 * expected. 858 * 859 * @param name The name that should appear in the exception if one 860 * will be thrown. Usually this is the name of the argument to 861 * validate. 862 * @param text The text to check. 863 * @param maxLength The maximum length. 864 * @return Always the contents of <code>text</code> as a String; if the 865 * argument fails any of the tests, an 866 * {@link IllegalArgumentException} 867 * or an exception derived from that will be thrown. 868 * @throws CharSequenceTooLongException {@code text} is longer than 869 * {@code maxLength}. 870 * @throws EmptyArgumentException Either {@code name} or {@code text} is 871 * the empty String. 872 * @throws NullArgumentException Either {@code name} or {@code text} is 873 * {@code null}. 874 * 875 * @since 0.0.5 876 */ 877 @API( status = STABLE, since = "0.0.5" ) 878 public static final String checkTextLen( final String name, final CharSequence text, final int maxLength ) throws CharSequenceTooLongException, EmptyArgumentException, NullArgumentException 879 { 880 if( requireNotEmptyArgument( text, requireNotEmptyArgument( name, "name" ) ).length() > maxLength ) 881 { 882 throw new CharSequenceTooLongException( name, maxLength ); 883 } 884 885 //---* Done *---------------------------------------------------------- 886 return text.toString(); 887 } // checkTextLen() 888 889 /** 890 * Tests if the given text is not longer than the given maximum length; 891 * different from 892 * {@link #checkTextLen(String, CharSequence, int)}, 893 * it may be {@code null} or empty. 894 * 895 * @param name The name that should appear in the exception if one 896 * will be thrown. 897 * @param text The text to check; may be {@code null}. 898 * @param maxLength The maximum length. 899 * @return Always the contents of {@code text} as a String, {@code null} 900 * if {@code text} was {@code null}; if the argument fails any of the 901 * tests, an 902 * {@link IllegalArgumentException} 903 * or an exception derived from that will be thrown. 904 * @throws CharSequenceTooLongException {@code text} is longer than 905 * {@code maxLength}. 906 * @throws EmptyArgumentException {@code name} is empty. 907 * @throws NullArgumentException {@code name} is {@code null}. 908 * 909 * @since 0.0.5 910 */ 911 @API( status = STABLE, since = "0.0.5" ) 912 public static final String checkTextLenNull( final String name, final CharSequence text, final int maxLength ) throws CharSequenceTooLongException 913 { 914 requireNotEmptyArgument( name, "name" ); 915 916 String retValue = null; 917 if( nonNull( text ) ) 918 { 919 if( text.length() > maxLength ) 920 { 921 throw new CharSequenceTooLongException( name, maxLength ); 922 } 923 retValue = text.toString(); 924 } 925 926 //---* Done *---------------------------------------------------------- 927 return retValue; 928 } // checkTextLenNull() 929 930 /** 931 * Changes the first letter of the given String tolower case as per 932 * {@link Character#toLowerCase(char)}. 933 * No other letters are changed. A {@code null} input String returns 934 * {@code null}.<br> 935 * <br>Samples:<pre><code> StringUtils.decapitalize( null ) = null; 936 * StringUtils.decapitalize("") = ""; 937 * StringUtils.decapitalize("Cat") = "cat"; 938 * StringUtils.decapitalize("CAT") = "cAT";</code></pre> 939 * <br>Basically, this is the complementary method to 940 * {@link #capitalize(CharSequence)}. 941 * Use this method to normalise the name of bean attributes. 942 * 943 * @param input The String to <i>decapitalise</i>, may be {@code null}. 944 * @return The <i>decapitalised</i> String, {@code null} if the argument 945 * was {@code null}. 946 * @see #capitalize(CharSequence) 947 * 948 * @since 0.0.5 949 */ 950 @API( status = STABLE, since = "0.1.0" ) 951 public static final String decapitalize( final CharSequence input ) 952 { 953 String retValue = null; 954 if( isNotEmpty( input ) ) 955 { 956 final var str = input.toString(); 957 final var firstCodePoint = str.codePointAt( 0 ); 958 final var newCodePoint = toLowerCase( firstCodePoint ); 959 if( firstCodePoint == newCodePoint ) 960 { 961 retValue = str; 962 } 963 else 964 { 965 final var strLen = str.length(); 966 final var newCodePoints = new int [strLen]; 967 var outOffset = 0; 968 newCodePoints [outOffset++] = newCodePoint; 969 //noinspection ForLoopWithMissingComponent 970 for( var inOffset = charCount( firstCodePoint ); inOffset < strLen; ) 971 { 972 final var codePoint = str.codePointAt( inOffset ); 973 newCodePoints [outOffset++] = codePoint; 974 inOffset += charCount( codePoint ); 975 } 976 retValue = new String( newCodePoints, 0, outOffset ); 977 } 978 } 979 else if( nonNull( input ) ) 980 { 981 retValue = EMPTY_STRING; 982 } 983 984 //---* Done *---------------------------------------------------------- 985 return retValue; 986 } // decapitalize() 987 988 /** 989 * <p>{@summary Escapes the non-ASCII and special characters in a 990 * {@code String} so that the result can be used in the context of HTML.} 991 * Wherever possible, the method will return the respective HTML 5 992 * entity; only when there is no matching entity, it will use the Unicode 993 * escape.</p> 994 * <p>So if you call the method with the argument 995 * "<i>Süße</i>", it will return 996 * "<code>S&uuml;&szlig;e</code>".</p> 997 * <p>If the input will be, for example, a Chinese text like this: 998 * "<i>球体</i>" (means "Ball"), you may get back something like 999 * this: "<code>&#x7403;&#x4F53;</code>", as there are 1000 * no entities defined for (any) Chinese letters.</p> 1001 * <p>The method supports all known HTML 5.0 entities, including 1002 * funky accents. But it will not escape several commonly used characters 1003 * like the full stop ('.'), the comma (','), the colon (':'), or the 1004 * semicolon (';'), although they will be handled properly by 1005 * {@link #unescapeHTML(CharSequence)}.</p> 1006 * <p>Note that the commonly used apostrophe escape character 1007 * (&apos;) that was not a legal entity for HTML before HTML 5 is 1008 * now supported.</p> 1009 * 1010 * @param input The {@code String} to escape, may be {@code null}. 1011 * @return A new escaped {@code String}, or {@code null} if the 1012 * argument was already {@code null}. 1013 * 1014 * @see #unescapeHTML(CharSequence) 1015 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 1016 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 1017 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 1018 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 1019 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 1020 * 1021 * @since 0.0.5 1022 */ 1023 /* 1024 * For some unknown reasons, JavaDoc will not accept the entities 球 1025 * and 体 (for '球' and '体'), therefore it was required to add the 1026 * Chinese characters directly into the comment above. 1027 */ 1028 @API( status = STABLE, since = "0.0.5" ) 1029 public static final String escapeHTML( final CharSequence input ) 1030 { 1031 final var retValue = nonNull( input ) ? HTML50.escape( input ) : null; 1032 1033 //---* Done *---------------------------------------------------------- 1034 return retValue; 1035 } // escapeHTML() 1036 1037 /** 1038 * Escapes the characters in a {@code String} using HTML entities and 1039 * writes them to an 1040 * {@link Appendable}. 1041 * For details, refer to 1042 * {@link #escapeHTML(CharSequence)}. 1043 * 1044 * @param appendable The appendable object receiving the escaped string. 1045 * @param input The {@code String} to escape, may be {@code null}. 1046 * @throws NullArgumentException The appendable is {@code null}. 1047 * @throws IOException when {@code Appendable} passed throws the exception 1048 * from calls to the 1049 * {@link Appendable#append(char)} 1050 * method. 1051 * 1052 * @see #escapeHTML(CharSequence) 1053 * @see #unescapeHTML(CharSequence) 1054 * @see <a href="http://hotwired.lycos.com/webmonkey/reference/special_characters/">ISO Entities</a> 1055 * @see <a href="http://www.w3.org/TR/REC-html32#latin1">HTML 3.2 Character Entities for ISO Latin-1</a> 1056 * @see <a href="http://www.w3.org/TR/REC-html40/sgml/entities.html">HTML 4.0 Character entity references</a> 1057 * @see <a href="http://www.w3.org/TR/html401/charset.html#h-5.3">HTML 4.01 Character References</a> 1058 * @see <a href="http://www.w3.org/TR/html401/charset.html#code-position">HTML 4.01 Code positions</a> 1059 * 1060 * @since 0.0.5 1061 */ 1062 @API( status = STABLE, since = "0.0.5" ) 1063 public static final void escapeHTML( final Appendable appendable, final CharSequence input ) throws IOException 1064 { 1065 requireNonNullArgument( appendable, "appendable" ); 1066 1067 if( nonNull( input ) ) HTML50.escape( appendable, input ); 1068 } // escapeHTML() 1069 1070 /** 1071 * Formats the given {@code String} for the output into JSONText. This 1072 * means that the input sequence will be surrounded by double quotes, and 1073 * backslash sequences are put into all the right places.<br> 1074 * <br>< and > will be inserted as their Unicode values, allowing 1075 * JSON text to be delivered in HTML.<br> 1076 * <br>In JSON text, a string cannot contain a control character or an 1077 * unescaped quote or backslash, so these are translated to Unicode 1078 * escapes also. 1079 * 1080 * @param input The string to escape to the JSON format; it may be 1081 * empty, but not {@code null}. 1082 * @return A string correctly formatted for insertion in a JSON text. 1083 * 1084 * @since 0.0.5 1085 */ 1086 @SuppressWarnings( "OverlyComplexMethod" ) 1087 @API( status = STABLE, since = "0.0.5" ) 1088 public static final String escapeJSON( final CharSequence input ) 1089 { 1090 var retValue = "\"\""; // The JSON empty string. 1091 final var len = requireNonNullArgument( input, "input" ).length(); 1092 if( len > 0 ) 1093 { 1094 final var buffer = new StringBuilder( len * 2 ).append( '"' ); 1095 char c; 1096 for( var i = 0; i < len; ++i ) 1097 { 1098 c = input.charAt( i ); 1099 switch( c ) 1100 { 1101 case '\\', '"', '<', '>', '&' -> buffer.append( escapeCharacter( c ) ); 1102 1103 case '\b' -> buffer.append( "\\b" ); 1104 1105 case '\t' -> buffer.append( "\\t" ); 1106 1107 case '\n'-> buffer.append( "\\n" ); 1108 1109 case '\f' -> buffer.append( "\\f" ); 1110 1111 case '\r' -> buffer.append( "\\r" ); 1112 1113 default -> 1114 { 1115 //noinspection OverlyComplexBooleanExpression,CharacterComparison,UnnecessaryUnicodeEscape 1116 if( (c < ' ') 1117 || ((c >= '\u0080') && (c < '\u00a0')) 1118 || ((c >= '\u2000') && (c < '\u2100')) ) 1119 { 1120 buffer.append( escapeCharacter( c ) ); 1121 } 1122 else 1123 { 1124 buffer.append( c ); 1125 } 1126 } 1127 } 1128 } 1129 buffer.append( '"' ); 1130 retValue = buffer.toString(); 1131 } 1132 1133 //---* Done *---------------------------------------------------------- 1134 return retValue; 1135 } // escapeJSON() 1136 1137 /** 1138 * Escapes the given character using Regex escapes and writes them to a 1139 * {@link Appendable}. 1140 * 1141 * @param appendable The appendable receiving the escaped string. 1142 * @param c The character to escape. 1143 * @throws NullArgumentException The appendable is {@code null}. 1144 * @throws IOException when {@code Appendable} passed throws the exception 1145 * from calls to the 1146 * {@link Appendable#append(CharSequence)} 1147 * method. 1148 * 1149 * @since 0.0.5 1150 */ 1151 @SuppressWarnings( "SwitchStatementWithTooManyBranches" ) 1152 @API( status = STABLE, since = "0.0.5" ) 1153 public static final void escapeRegex( final Appendable appendable, final char c ) throws IOException 1154 { 1155 requireNonNullArgument( appendable, "appendable" ); 1156 1157 TestSwitch: switch( c ) 1158 { 1159 case '\\' -> appendable.append( "\\" ); 1160 case '[', ']', '{', '}', '(', ')', '^', '$', '&', '*', '.', '+', '|', '?' -> appendable.append( "\\" ).append( c ); 1161 case '\t' -> appendable.append( "\\t" ); 1162 case '\n' -> appendable.append( "\\n" ); 1163 case '\r' -> appendable.append( "\\r" ); 1164 case '\f' -> appendable.append( "\\f" ); 1165 case '\u0007' -> appendable.append( "\\a" ); 1166 case '\u001B' -> appendable.append( "\\e" ); // ESC 1167 default -> appendable.append( c ); 1168 } // TestSwitch: 1169 } // escapeRegex() 1170 1171 /** 1172 * Escapes the given character using Regex escapes. 1173 * 1174 * @param c The character to escape. 1175 * @return A {@code String} with the escaped character. 1176 * 1177 * @since 0.0.5 1178 */ 1179 @API( status = STABLE, since = "0.0.5" ) 1180 public static final String escapeRegex( final char c ) 1181 { 1182 final var retValue = new StringBuilder(); 1183 try 1184 { 1185 escapeRegex( retValue, c ); 1186 } 1187 catch( final IOException e ) 1188 { 1189 /* 1190 * We append to a StringBuilder, and StringBuilder.append() does 1191 * not define an IOException. 1192 */ 1193 throw new ImpossibleExceptionError( e ); 1194 } 1195 1196 //---* Done *---------------------------------------------------------- 1197 return retValue.toString(); 1198 } // escapeRegex() 1199 1200 /** 1201 * Escapes the characters in a {@code String} using Regex escapes. 1202 * 1203 * @param input The {@code String} to escape, may be {@code null}. 1204 * @return A new escaped {@code String}, or {@code null} if the argument 1205 * was already {@code null}. 1206 * 1207 * @since 0.0.5 1208 */ 1209 @API( status = STABLE, since = "0.0.5" ) 1210 public static final String escapeRegex( final CharSequence input ) 1211 { 1212 String retValue = null; 1213 if( nonNull( input ) ) 1214 { 1215 final var len = input.length(); 1216 if( len > 0 ) 1217 { 1218 final var buffer = new StringBuilder( (len * 12) / 10 ); 1219 try 1220 { 1221 escapeRegex( buffer, input ); 1222 } 1223 catch( final IOException e ) 1224 { 1225 /* 1226 * We append to a StringBuilder, and StringBuilder.append() does 1227 * not define an IOException. 1228 */ 1229 throw new ImpossibleExceptionError( e ); 1230 } 1231 retValue = buffer.toString(); 1232 } 1233 else 1234 { 1235 retValue = EMPTY_STRING; 1236 } 1237 } 1238 1239 //---* Done *---------------------------------------------------------- 1240 return retValue; 1241 } // escapeRegex() 1242 1243 /** 1244 * Escapes the characters in a {@code String} using Regex escapes and 1245 * writes them to a 1246 * {@link Appendable}. 1247 * 1248 * @param appendable The appendable receiving the escaped string. 1249 * @param input The {@code String} to escape. If {@code null} or the empty 1250 * String, nothing will be put to the appendable. 1251 * @throws NullArgumentException The appendable is {@code null}. 1252 * @throws IOException when {@code Appendable} passed throws the exception 1253 * from calls to the 1254 * {@link Appendable#append(CharSequence)} 1255 * method. 1256 * 1257 * @since 0.0.5 1258 */ 1259 @API( status = STABLE, since = "0.0.5" ) 1260 public static final void escapeRegex( final Appendable appendable, final CharSequence input ) throws IOException 1261 { 1262 requireNonNullArgument( appendable, "appendable" ); 1263 1264 if( isNotEmpty( input ) ) 1265 { 1266 ScanLoop: for( var i = 0; i < input.length(); ++i ) 1267 { 1268 escapeRegex( appendable, input.charAt( i ) ); 1269 } // ScanLoop: 1270 } 1271 } // escapeRegex() 1272 1273 /** 1274 * <p>{@summary Escapes the characters in a {@code String} using XML 1275 * entities.}</p> 1276 * <p>For example:</p> 1277 * <p>{@code "bread" & "butter"}</p> 1278 * <p>becomes:</p> 1279 * <p><code>&quot;bread&quot; &amp; 1280 * &quot;butter&quot;</code>.</p> 1281 * 1282 * @param input The {@code String} to escape, may be null. 1283 * @return A new escaped {@code String}, or {@code null} if the 1284 * argument was already {@code null}. 1285 * 1286 * @see #unescapeXML(CharSequence) 1287 */ 1288 @API( status = STABLE, since = "0.0.5" ) 1289 public static final String escapeXML( final CharSequence input ) 1290 { 1291 final var retValue = nonNull( input ) ? XML.escape( input ) : null; 1292 1293 //---* Done *---------------------------------------------------------- 1294 return retValue; 1295 } // escapeXML() 1296 1297 /** 1298 * <p>{@summary Escapes the characters in a {@code String} using XML 1299 * entities and writes them to an 1300 * {@link Appendable}.}</p> 1301 * <p>For example:</p> 1302 * <p>{@code "bread" & "butter"}</p> 1303 * <p>becomes:</p> 1304 * <p><code>&quot;bread&quot; &amp; 1305 * &quot;butter&quot;</code>.</p> 1306 * 1307 * @param appendable The appendable object receiving the escaped string. 1308 * @param input The {@code String} to escape, may be {@code null}. 1309 * @throws NullArgumentException The appendable is {@code null}. 1310 * @throws IOException when {@code Appendable} passed throws the exception 1311 * from calls to the 1312 * {@link Appendable#append(char)} 1313 * method. 1314 * 1315 * @see #escapeXML(CharSequence) 1316 * @see #unescapeXML(CharSequence) 1317 * 1318 * @since 0.0.5 1319 */ 1320 @API( status = STABLE, since = "0.0.5" ) 1321 public static final void escapeXML( final Appendable appendable, final CharSequence input ) throws IOException 1322 { 1323 requireNonNullArgument( appendable, "appendable" ); 1324 1325 if( nonNull( input ) ) XML.escape( appendable, input ); 1326 } // escapeXML() 1327 1328 /** 1329 * Tests if the given String is {@code null} or the empty String. 1330 * 1331 * @param input The String to test. 1332 * @return {@code true} if the given String reference is 1333 * {@code null} or the empty String. 1334 * 1335 * @since 0.0.5 1336 */ 1337 @API( status = STABLE, since = "0.0.5" ) 1338 public static final boolean isEmpty( final CharSequence input ) { return isNull( input ) || input.isEmpty(); } 1339 1340 /** 1341 * Tests if the given String is {@code null}, the empty String, or just 1342 * containing whitespace. 1343 * 1344 * @param input The String to test. 1345 * @return {@code true} if the given String reference is not 1346 * {@code null} and not the empty String. 1347 * 1348 * @see String#isBlank() 1349 * 1350 * @since 0.0.5 1351 */ 1352 @API( status = STABLE, since = "0.0.5" ) 1353 public static final boolean isEmptyOrBlank( final CharSequence input ) 1354 { 1355 final var retValue = isNull( input ) || input.toString().isBlank(); 1356 1357 //---* Done *---------------------------------------------------------- 1358 return retValue; 1359 } // isEmptyOrBlank() 1360 1361 /** 1362 * Tests if the given String is not {@code null} and not the empty 1363 * String. 1364 * 1365 * @param input The String to test. 1366 * @return {@code true} if the given String reference is not 1367 * {@code null} and not the empty String. 1368 * 1369 * @since 0.0.5 1370 */ 1371 @API( status = STABLE, since = "0.0.5" ) 1372 public static final boolean isNotEmpty( final CharSequence input ) { return nonNull( input ) && !input.isEmpty(); } 1373 1374 /** 1375 * Tests if the given String is not {@code null}, not the empty String, 1376 * and that it contains other characters than just whitespace. 1377 * 1378 * @param input The String to test. 1379 * @return {@code true} if the given String reference is not 1380 * {@code null} and not the empty String, and it contains other 1381 * characters than just whitespace. 1382 * 1383 * @see String#isBlank() 1384 * 1385 * @since 0.0.5 1386 */ 1387 @API( status = STABLE, since = "0.0.5" ) 1388 public static final boolean isNotEmptyOrBlank( final CharSequence input ) 1389 { 1390 final var retValue = nonNull( input ) && !input.toString().isBlank(); 1391 1392 //---* Done *---------------------------------------------------------- 1393 return retValue; 1394 } // isNotEmptyOrBlank() 1395 1396 /** 1397 * Determines the maximum length over all Strings provided in the given 1398 * {@link Stream}. 1399 * 1400 * @param stream The strings. 1401 * @return The length of the longest string in the list; -1 if all values 1402 * in the given {@code stream} are {@code null}, and 1403 * {@link Integer#MIN_VALUE} 1404 * if the given {@code stream} is empty. 1405 * 1406 * @since 0.0.5 1407 */ 1408 @API( status = STABLE, since = "0.0.5" ) 1409 public static final int maxContentLength( final Stream<? extends CharSequence> stream ) 1410 { 1411 final var retValue = requireNonNullArgument( stream, "stream" ) 1412 .mapToInt( string -> nonNull( string ) ? string.length() : -1 ) 1413 .max() 1414 .orElse( Integer.MIN_VALUE ); 1415 1416 //---* Done *---------------------------------------------------------- 1417 return retValue; 1418 } // maxContentLength() 1419 1420 /** 1421 * Determines the maximum length over all strings provided in the given 1422 * {@link Collection}. 1423 * 1424 * @param list The strings. 1425 * @return The length of the longest string in the list; -1 if all values 1426 * in the given {@code list} are {@code null}, and 1427 * {@link Integer#MIN_VALUE} 1428 * if the given {@code list} is empty. 1429 * 1430 * @since 0.0.5 1431 */ 1432 @API( status = STABLE, since = "0.0.5" ) 1433 public static final int maxContentLength( final Collection<? extends CharSequence> list ) 1434 { 1435 final var retValue = maxContentLength( requireNonNullArgument( list, "list" ).stream() ); 1436 1437 //---* Done *---------------------------------------------------------- 1438 return retValue; 1439 } // maxContentLength() 1440 1441 /** 1442 * Determines the maximum length over all strings provided in the given 1443 * array. 1444 * 1445 * @param a The strings. 1446 * @return The length of the longest string in the list; -1 if all values 1447 * in the array are {@code null}, and 1448 * {@link Integer#MIN_VALUE} 1449 * if the given array has zero length. 1450 * 1451 * @since 0.0.5 1452 */ 1453 @API( status = STABLE, since = "0.0.5" ) 1454 public static final int maxContentLength( final CharSequence... a ) 1455 { 1456 final var retValue = maxContentLength( Arrays.stream( requireNonNullArgument( a, "a" ) ) ); 1457 1458 //---* Done *---------------------------------------------------------- 1459 return retValue; 1460 } // maxContentLength() 1461 1462 /** 1463 * <p>{@summary Normalizes the given String to a pure ASCII String.} This 1464 * replaces 'ß' by 'ss' and replaces all diacritical characters by their 1465 * base form (that mean that 'ü' gets 'u' and so on). For the normalizing 1466 * of a search criteria, this should be sufficient, although it may cause 1467 * issues for non-latin scripts, as for these the input can be mapped to 1468 * the empty String. 1469 * 1470 * @note The scandinavian letters 'ø' and 'Ø' are not diacritical 1471 * letters, nevertheless they will be replaced. 1472 * 1473 * @param input The input string. 1474 * @return The normalised String, only containing ASCII characters; it 1475 * could be empty. 1476 * 1477 * TODO Check the implementation and the results!! 2022-12-10 1478 */ 1479 public static final String normalizeToASCII( final CharSequence input ) 1480 { 1481 final var str = requireNonNullArgument( input, "s" ).toString() 1482 .replace( "ß", "ss" ) 1483 .replace( 'ø', 'o' ) 1484 .replace( 'Ø', 'O' ); 1485 final var retValue = normalize( str, NFD ) 1486 .replaceAll( "[^\\p{ASCII}]", EMPTY_STRING ); 1487 1488 //---* Done *---------------------------------------------------------- 1489 return retValue; 1490 } // normalizeToASCII() 1491 1492 /** 1493 * Brings the given string to the given length and uses the provided 1494 * padding character to fill up the string. 1495 * 1496 * @param input The string to format. 1497 * @param length The desired length; if 0 or less, the given string is 1498 * returned, regardless of {@code clip}. 1499 * @param c The pad character. 1500 * @param mode The 1501 * {@linkplain StringUtils.Padding pad mode}. 1502 * @param clip {@code true} if the input string should be cut in case 1503 * it is longer than {@code length}, {@code false} if it has to be 1504 * returned unchanged . 1505 * @return The re-formatted string. 1506 * 1507 * @since 0.0.5 1508 */ 1509 @API( status = STABLE, since = "0.0.5" ) 1510 public static final String pad( final CharSequence input, final int length, final char c, final Padding mode, final boolean clip ) 1511 { 1512 return pad( input, length, c, mode, clip ? CLIPPING_CUT : CLIPPING_NONE ); 1513 } // pad() 1514 1515 /** 1516 * Brings the given string to the given length and uses the provided 1517 * padding character to fill up the string. 1518 * 1519 * @param input The string to format. 1520 * @param length The desired length; if 0 or less, the given string is 1521 * returned, regardless of {@code clip}. 1522 * @param c The pad character. 1523 * @param mode The 1524 * {@linkplain StringUtils.Padding pad mode}. 1525 * @param clip The 1526 * {@linkplain StringUtils.Clipping clipping mode}. 1527 * @return The re-formatted string. 1528 * 1529 * @since 0.0.5 1530 */ 1531 @API( status = STABLE, since = "0.0.5" ) 1532 public static final String pad( final CharSequence input, final int length, final char c, final Padding mode, final Clipping clip ) 1533 { 1534 //noinspection OverlyComplexBooleanExpression 1535 if( ((requireNonNullArgument( clip, "clip" ) == CLIPPING_ABBREVIATE) && (length < 4)) || ((clip == CLIPPING_ABBREVIATE_MIDDLE) && (length < 5)) ) 1536 { 1537 throw new ValidationException( "Length %d is too short for clipping mode %s".formatted( length, clip.toString() ) ); 1538 } 1539 requireNonNullArgument( mode, "mode" ); 1540 1541 final String retValue; 1542 final var currentLength = requireNonNullArgument( input, "input" ).length(); 1543 1544 if( (length > 0) && (length != currentLength) ) 1545 { 1546 if( currentLength > length ) 1547 { 1548 retValue = clip.clip( input, length ); 1549 } 1550 else 1551 { 1552 final var padSize = length - currentLength; 1553 retValue = mode.pad( input, padSize, c ); 1554 } 1555 } 1556 else 1557 { 1558 retValue = input.toString(); 1559 } 1560 1561 //---* Done *---------------------------------------------------------- 1562 return retValue; 1563 } // pad() 1564 1565 /** 1566 * <p>{@summary Fills up the given string to the given length by adding 1567 * blanks on both sides; will abbreviate the string if it is longer than 1568 * the given length.} The minimum length is 5.</p> 1569 * <p>This is a shortcut to a call to 1570 * {@link #pad(CharSequence,int,char,Padding,Clipping) pad( input, length, ' ', PADDING_CENTER, CLIPPING_ABBREVIATE_MIDDLE ) }.</p> 1571 * 1572 * @param input The string to format. 1573 * @param length The desired length; minimum value is 5. 1574 * @return The re-formatted string. 1575 * 1576 * @see Padding#PADDING_CENTER 1577 * @see Clipping#CLIPPING_ABBREVIATE_MIDDLE 1578 * 1579 * @since 0.0.5 1580 */ 1581 @API( status = STABLE, since = "0.0.5" ) 1582 public static final String padCenter( final CharSequence input, final int length ) { return pad( input, length, ' ', PADDING_CENTER, CLIPPING_ABBREVIATE_MIDDLE ); } 1583 1584 /** 1585 * <p>{@summary Fills up the given string to the given length by adding 1586 * blanks on the left side; will abbreviate the string if it is longer 1587 * than the given length.} The minimum length is 4.</p> 1588 * <p>This is a shortcut to a call to 1589 * {@link #pad(CharSequence,int,char,Padding,Clipping) pad( input, length, ' ', PADDING_LEFT, CLIPPING_ABBREVIATE ) }.</p> 1590 * 1591 * @param input The string to format. 1592 * @param length The desired length; the minimum value is 4. 1593 * @return The re-formatted string. 1594 * 1595 * @see Padding#PADDING_LEFT 1596 * @see Clipping#CLIPPING_ABBREVIATE 1597 * 1598 * @since 0.0.5 1599 */ 1600 @API( status = STABLE, since = "0.0.5" ) 1601 public static final String padLeft( final CharSequence input, final int length ) { return pad( input, length, ' ', PADDING_LEFT, CLIPPING_ABBREVIATE ); } 1602 1603 /** 1604 * <p>{@summary Fills up the given string to the given length by adding 1605 * blanks on the right side; will abbreviate the string if it is longer 1606 * than the given length.} The minimum length is 4.</p> 1607 * <p>This is a shortcut to a call to 1608 * {@link #pad(CharSequence,int,char,Padding,Clipping) pad( input, length, ' ', PADDING_RIGHT, CLIPPING_ABBREVIATE ) }.</p> 1609 * 1610 * @param input The string to format. 1611 * @param length The desired length; the minimum value is 4. 1612 * @return The re-formatted string. 1613 * 1614 * @see Padding#PADDING_RIGHT 1615 * @see Clipping#CLIPPING_ABBREVIATE 1616 * 1617 * @since 0.0.5 1618 */ 1619 @API( status = STABLE, since = "0.0.5" ) 1620 public static final String padRight( final CharSequence input, final int length ) { return pad( input, length, ' ', PADDING_RIGHT, CLIPPING_ABBREVIATE ); } 1621 1622 /** 1623 * <p>{@summary Surrounds the given String with double-quotes 1624 * (", &#34;).}</p> 1625 * <p>When the double-quote is needed in a String constant, it has to be 1626 * escaped with a backslash:</p> 1627 * <pre><code>"\"…\""</code></pre> 1628 * <p>Sometimes, this is just ugly, and there this method comes into 1629 * play.</p> 1630 * 1631 * @param input The String to surround; can be {@code null}. 1632 * @return The quoted String; will be {@code null} if the argument was 1633 * {@code null} already. 1634 */ 1635 public static final String quote( final CharSequence input ) 1636 { 1637 final var retValue = isNull( input ) ? null : String.format( "\"%s\"", input ); 1638 1639 //---* Done *---------------------------------------------------------- 1640 return retValue; 1641 } // quote() 1642 1643 /** 1644 * <p>{@summary This method replaces all diacritical characters in the 1645 * input String by their base form.} That means that 'ü' gets 'u', `È' 1646 * gets 'E' and so on).</p> 1647 * <p>This differs from 1648 * {@link #normalizeToASCII(CharSequence)} 1649 * as this method still allows non-ASCII characters in the output.</p> 1650 * 1651 * @note The scandinavian letters 'ø' and 'Ø' are not diacritical 1652 * letters, meaning they will not be replaced. 1653 * 1654 * @param input The input string. 1655 * @return The normalised String, not containing any diacritical 1656 * characters. 1657 * 1658 * TODO Check the implementation and the results!! 2022-12-10 1659 */ 1660 public static final String removeDiacriticalMarks( final CharSequence input ) 1661 { 1662 final var retValue = normalize( requireNonNullArgument( input, "input" ), NFD ) 1663 .replaceAll("\\p{InCombiningDiacriticalMarks}+", EMPTY_STRING ); 1664 1665 //---* Done *---------------------------------------------------------- 1666 return retValue; 1667 } // removeDiacriticalMarks() 1668 1669 /** 1670 * Repeats the given char {@code repeat} to form a new String. The table 1671 * below shows the various result for some argument combinations.<br> 1672 * <br><code> 1673 * StringUtils.repeat( 'a', 0 ) ⇒ ""<br> 1674 * StringUtils.repeat( 'a', 3 ) ⇒ "aaa"<br> 1675 * StringUtils.repeat( 'a', -2 ) ⇒ ""<br> 1676 * </code> 1677 * 1678 * @param c The character to repeat. 1679 * @param count The number of times to repeat {@code c}; a negative 1680 * value will be treated as zero. 1681 * @return A new String consisting of the given character repeated 1682 * {@code count} times, or the empty String if {@code count} was 0 1683 * or negative. 1684 * 1685 * @see String#repeat(int) 1686 * 1687 * @since 0.0.5 1688 */ 1689 @API( status = STABLE, since = "0.0.5" ) 1690 public static final String repeat( final char c, final int count ) 1691 { 1692 final var retValue = ( count > 0 ? Character.toString( c ).repeat( count ) : EMPTY_STRING).intern(); 1693 1694 //---* Done *---------------------------------------------------------- 1695 return retValue; 1696 } // repeat() 1697 1698 /** 1699 * Repeats the given char {@code repeat}, identified by its code point, to 1700 * form a new String. The 1701 * table below shows the various result for some argument 1702 * combinations.<br> 1703 * <br><code> 1704 * StringUtils.repeat( 'a', 0 ) ⇒ ""<br> 1705 * StringUtils.repeat( 'a', 3 ) ⇒ "aaa"<br> 1706 * StringUtils.repeat( 'a', -2 ) ⇒ ""<br> 1707 * </code> 1708 * 1709 * @param codePoint The character to repeat. 1710 * @param count The number of times to repeat {@code c}; a negative 1711 * value will be treated as zero. 1712 * @return A new String consisting of the given character repeated 1713 * {@code count} times, or the empty String if {@code count} was 0 1714 * or negative, or {@code null} if the code point is invalid. 1715 * 1716 * @see Character#isValidCodePoint(int) 1717 * @see String#repeat(int) 1718 * 1719 * @since 0.0.5 1720 */ 1721 @API( status = STABLE, since = "0.0.5" ) 1722 public static final String repeat( final int codePoint, final int count ) 1723 { 1724 final var retValue = (count > 0) 1725 ? isValidCodePoint( codePoint ) 1726 ? Character.toString( codePoint ).repeat( count ).intern() 1727 : null 1728 : EMPTY_STRING; 1729 1730 //---* Done *---------------------------------------------------------- 1731 return retValue; 1732 } // repeat() 1733 1734 /** 1735 * Repeats the given String {@code repeat} times to form a new String. The 1736 * table below shows the various result for some argument 1737 * combinations.<br> 1738 * <br><code> 1739 * StringUtils.repeat( null, 2 ) ⇒ null<br> 1740 * StringUtils.repeat( "", 0 ) ⇒ ""<br> 1741 * StringUtils.repeat( "", 2 ) ⇒ ""<br> 1742 * StringUtils.repeat( "a", 3 ) ⇒ "aaa"<br> 1743 * StringUtils.repeat( "ab", 2 ) ⇒ "abab"<br> 1744 * StringUtils.repeat( "a", -2 ) ⇒ ""<br> 1745 * </code> 1746 * 1747 * @param input The String to repeat, may be {@code null}. 1748 * @param count The number of times to repeat {@code str}; a negative 1749 * value will be treated as zero. 1750 * @return A new String consisting of the original String repeated, 1751 * {@code count} times, the empty String if {@code count} was 0 1752 * or negative, or {@code null} if the input String was 1753 * {@code null}, too. 1754 * 1755 * @see String#repeat(int) 1756 * 1757 * @since 0.0.5 1758 */ 1759 @API( status = STABLE, since = "0.0.5" ) 1760 public static final String repeat( final CharSequence input, final int count ) 1761 { 1762 final var retValue = 1763 nonNull( input ) 1764 ? (count > 0) && !input.isEmpty() 1765 ? input.toString().repeat( count ) 1766 : EMPTY_STRING 1767 : null; 1768 1769 //---* Done *---------------------------------------------------------- 1770 return retValue; 1771 } // repeat() 1772 1773 /** 1774 * <p>{@summary Splits a String by the given separator character and 1775 * returns an array of all parts.} In case a separator character is 1776 * immediately followed by another separator character, an empty String 1777 * will be placed to the array.</p> 1778 * <p>Beginning and end of the String are treated as separators, so if the 1779 * first character of the String is a separator, the returned array will 1780 * start with an empty String, as it will end with an empty String if the 1781 * last character is a separator.</p> 1782 * <p>In case the String is empty, the return value will be an array 1783 * containing just the empty String. It will not be empty.</p> 1784 * 1785 * @param input The String to split. 1786 * @param separator The separator character. 1787 * @return The parts of the String. 1788 * 1789 * @since 0.0.5 1790 */ 1791 @API( status = STABLE, since = "0.0.5" ) 1792 public static final String [] splitString( final CharSequence input, final char separator ) 1793 { 1794 return splitString( input, (int) separator ); 1795 } // splitString() 1796 1797 /** 1798 * <p>{@summary Splits a String by the given separator character, 1799 * identified by its Unicode code point, and returns an array of all 1800 * parts.} In case a separator character is immediately followed by 1801 * another separator character, an empty String will be placed to the 1802 * array.</p> 1803 * <p>Beginning and end of the String are treated as separators, so if the 1804 * first character of the String is a separator, the returned array will 1805 * start with an empty String, as it will end with an empty String if the 1806 * last character is a separator.</p> 1807 * <p>In case the String is empty, the return value will be an array 1808 * containing just the empty String. It will not be empty.</p> 1809 * 1810 * @param input The String to split. 1811 * @param separator The code point for the separator character. 1812 * @return The parts of the String. 1813 * 1814 * @since 0.0.5 1815 */ 1816 @API( status = STABLE, since = "0.0.5" ) 1817 public static final String [] splitString( final CharSequence input, final int separator ) 1818 { 1819 final var retValue = stream( input, separator ).toArray( String []::new ); 1820 1821 //---* Done *---------------------------------------------------------- 1822 return retValue; 1823 } // splitString() 1824 1825 /** 1826 * <p>{@summary Splits a String by the given separator sequence and 1827 * returns an array of all parts.} In case a separator sequence is 1828 * immediately followed by another separator sequence, an empty String 1829 * will be placed to the array.</p> 1830 * <p>Beginning and end of the String are treated as separators, so if the 1831 * first part of the String equals the separator sequence, the returned 1832 * array will start with an empty String, as it will end with an empty 1833 * String if the last part would equal the separator sequence.</p> 1834 * <p>In case the String is empty, the return value will be an array 1835 * containing just the empty String. It will not be empty.</p> 1836 * 1837 * @param input The String to split. 1838 * @param separator The separator sequence. 1839 * @return The parts of the String. 1840 * 1841 * @since 0.0.5 1842 */ 1843 @API( status = STABLE, since = "0.0.5" ) 1844 public static final String [] splitString( final CharSequence input, final CharSequence separator ) 1845 { 1846 final var retValue = stream( input, separator).toArray( String []::new ); 1847 1848 //---* Done *---------------------------------------------------------- 1849 return retValue; 1850 } // splitString() 1851 1852 /** 1853 * <p>{@summary Splits a String by the given separator character and 1854 * returns an instance of 1855 * {@link Stream} 1856 * providing all parts.} In case a separator character is immediately 1857 * followed by another separator character, an empty String will be put to 1858 * the {@code Stream}.</p> 1859 * <p>Beginning and end of the String are treated as separators, so if the 1860 * first character of the String is a separator, the returned 1861 * {@code Stream} will start with an empty String, as it will end with an 1862 * empty String if the last character is a separator.</p> 1863 * <p>In case the String is empty, the return value will be a 1864 * {@code Stream} containing just the empty String. It will not be 1865 * empty.</p> 1866 * 1867 * @param input The String to split. 1868 * @param separator The separator character. 1869 * @return A {@code Stream} instance with the parts of the String. 1870 * 1871 * @since 0.0.7 1872 */ 1873 @API( status = STABLE, since = "0.0.7" ) 1874 public static final Stream<String> stream( final CharSequence input, final char separator ) 1875 { 1876 return stream( input, (int) separator ); 1877 } // stream() 1878 1879 /** 1880 * <p>{@summary Splits a String by the given separator character, identified by its 1881 * Unicode code point, and returns a 1882 * {@link Stream} 1883 * of all parts.} In case a separator character is immediately followed by 1884 * another separator char, an empty String will be put to the 1885 * {@code Stream}.</p> 1886 * <p>Beginning and end of the String are treated as 1887 * separators, so if the first character of the String is a separator, the 1888 * returned {@code Stream} will start with an empty String, as it will end 1889 * with an empty String if the last character is a separator.</p> 1890 * <p>In case the String is empty, the return value will be a 1891 * {@code Stream} containing just the empty String. It will not be 1892 * empty.</p> 1893 * 1894 * @param input The String to split. 1895 * @param separator The code point for the separator character. 1896 * @return A {@code Stream} instance with the parts of the String. 1897 * 1898 * @since 0.0.7 1899 */ 1900 @API( status = STABLE, since = "0.0.7" ) 1901 public static final Stream<String> stream( final CharSequence input, final int separator ) 1902 { 1903 //---* Process the string *-------------------------------------------- 1904 final var codepoints = requireNonNullArgument( input, "input" ).codePoints().toArray(); 1905 final var builder = Stream.<String>builder(); 1906 var begin = -1; 1907 for( var i = 0 ; i < codepoints.length; ++i ) 1908 { 1909 if( begin == -1 ) 1910 { 1911 begin = i; 1912 } 1913 if( codepoints [i] == separator ) 1914 { 1915 builder.add( new String( codepoints, begin, i - begin ).intern() ); 1916 begin = -1; 1917 } 1918 } 1919 1920 //---* Add the rest *-------------------------------------------------- 1921 if( begin >= 0 ) 1922 { 1923 builder.add( new String( codepoints, begin, codepoints.length - begin ).intern() ); 1924 } 1925 if( (codepoints.length == 0) || (codepoints [codepoints.length - 1] == separator) ) 1926 { 1927 builder.add( EMPTY_STRING ); 1928 } 1929 1930 //---* Create the return value *--------------------------------------- 1931 final var retValue = builder.build(); 1932 1933 //---* Done *---------------------------------------------------------- 1934 return retValue; 1935 } // stream() 1936 1937 /** 1938 * <p>{@summary Splits a String by the given separator sequence and 1939 * returns an instance of 1940 * {@link Stream} 1941 * containing all parts.} In case a separator sequence is immediately 1942 * followed by another separator sequence, an empty String will be put to 1943 * the {@code Stream}.</p> 1944 * <p>Beginning and end of the String are treated as separators, so if the 1945 * first part of the String equals the separator sequence, the returned 1946 * {@code Stream} will start with an empty string, as it will end with an 1947 * empty String if the last part would equal the separator sequence.</p> 1948 * <p>In case the String is empty, the return value will be a 1949 * {@code Stream} containing just the empty String. It will not be 1950 * empty.</p> 1951 * 1952 * @param input The String to split. 1953 * @param separator The separator sequence. 1954 * @return The parts of the String. 1955 * 1956 * @since 0.0.7 1957 */ 1958 @API( status = STABLE, since = "0.0.7" ) 1959 public static final Stream<String> stream( final CharSequence input, final CharSequence separator ) 1960 { 1961 //---* Process the string *-------------------------------------------- 1962 var s = requireNonNullArgument( input, "input" ).toString(); 1963 final var t = requireNotEmptyArgument( separator, "separator" ).toString(); 1964 1965 final var builder = Stream.<String>builder(); 1966 var pos = Integer.MAX_VALUE; 1967 while( isNotEmpty( s ) && (pos >= 0) ) 1968 { 1969 pos = s.indexOf( t ); 1970 switch( Integer.signum( pos ) ) 1971 { 1972 case 0 -> /* String starts with separator */ 1973 { 1974 builder.add( EMPTY_STRING ); 1975 s = s.substring( t.length() ); 1976 } 1977 case 1 -> /* String contains a separator somewhere */ 1978 { 1979 builder.add( s.substring( 0, pos ) ); 1980 s = s.substring( pos + t.length() ); 1981 } 1982 default -> { /* Just leave the loop */ } 1983 } // ResultHandlerSwitch: 1984 } 1985 1986 //---* Add the rest *-------------------------------------------------- 1987 builder.add( s ); 1988 1989 //---* Create the return value *--------------------------------------- 1990 final var retValue = builder.build(); 1991 1992 //---* Done *---------------------------------------------------------- 1993 return retValue; 1994 } // stream() 1995 1996 /** 1997 * <p>{@summary Splits a String using the given regular expression and 1998 * returns an instance of 1999 * {@link Stream} 2000 * providing all parts.} In case a separator sequence is immediately 2001 * followed by another separator sequence, an empty String will be put to 2002 * the {@code Stream}.</p> 2003 * <p>Beginning and end of the String are treated as separators, so if the 2004 * first part of the String equals the separator sequence, the returned 2005 * {@code Stream} will start with an empty string, as it will end with an 2006 * empty String if the last part would equal the separator sequence.</p> 2007 * <p>In case the String is empty, the return value will be a 2008 * {@code Stream} containing just the empty String. It will not be 2009 * empty.</p> 2010 * 2011 * @note This method behaves different from 2012 * {@link String#split(String)} 2013 * as it will return trailing empty Strings. 2014 * 2015 * @param input The String to split. 2016 * @param pattern The separator sequence. 2017 * @return The parts of the String. 2018 * 2019 * @see String#split(String) 2020 * @see Pattern#split(CharSequence) 2021 * 2022 * @since 0.0.7 2023 */ 2024 @API( status = STABLE, since = "0.0.7" ) 2025 public static final Stream<String> stream( final CharSequence input, final Pattern pattern ) 2026 { 2027 requireNonNullArgument( pattern, "pattern" ); 2028 2029 //---* Process the string *-------------------------------------------- 2030 final var builder = Stream.<String>builder(); 2031 if( isEmpty( requireNonNullArgument( input, "s" ) ) ) 2032 { 2033 builder.add( EMPTY_STRING ); 2034 } 2035 else 2036 { 2037 final var parts = pattern.split( input ); 2038 for( final var part : parts ) 2039 { 2040 builder.add( part ); 2041 } 2042 final var matcher = pattern.matcher( input ); 2043 var count = 0; 2044 while( matcher.find() ) ++count; 2045 //noinspection ForLoopWithMissingComponent 2046 for( ; count >= parts.length; --count ) 2047 { 2048 builder.add( EMPTY_STRING ); 2049 } 2050 } 2051 2052 //---* Create the return value *--------------------------------------- 2053 final var retValue = builder.build(); 2054 2055 //---* Done *---------------------------------------------------------- 2056 return retValue; 2057 } // stream() 2058 2059 /** 2060 * Strips HTML or XML tags from the given String, without touching other 2061 * entities (like {@code &} or {@code }). The result would be 2062 * the effective text, stripped from all other whitespace (except single 2063 * blanks).<br> 2064 * <br>This means that the result for 2065 * <pre><code>stripTags( "<html> 2066 * <head> 2067 * … 2068 * </head> 2069 * <body> 2070 * <a href='…'> Simple <br> 2071 * <br> Text </a> 2072 * </body> 2073 * </html>" )</code></pre> would be just 2074 * "{@code Simple Text}".<br> 2075 * <br>Comments will be stripped as well, and {@code <pre>} tags are not 2076 * interpreted, with the consequence that any formatting with whitespace 2077 * gets lost. {@code CDATA} elements are stripped, too. 2078 * 2079 * @param input The HTML/XML string. 2080 * @return The string without the tags. 2081 * 2082 * @since 0.0.7 2083 */ 2084 @API( status = STABLE, since = "0.0.5" ) 2085 public static final String stripTags( final CharSequence input ) 2086 { 2087 final var retValue = new StringBuilder(); 2088 if( isNotEmptyOrBlank( requireNonNullArgument( input, "input" ) ) ) 2089 { 2090 final var matcher = m_TagRemovalPattern.matcher( input ); 2091 final var buffer = matcher.replaceAll( " " ).trim().codePoints().toArray(); 2092 int lastChar = NULL_CHAR; 2093 ScanLoop: for( final var codePoint : buffer ) 2094 { 2095 if( isWhitespace( codePoint ) ) 2096 { 2097 //---* Consecutive whitespace detected *------------------- 2098 if( isWhitespace( lastChar ) ) continue ScanLoop; 2099 2100 //---* All resulting whitespace have to be blanks *-------- 2101 retValue.append( " " ); 2102 } 2103 else 2104 { 2105 //---* Write the character *------------------------------- 2106 retValue.append( toChars( codePoint ) ); 2107 } 2108 lastChar = codePoint; 2109 } // ScanLoop: 2110 } 2111 2112 //---* Done *---------------------------------------------------------- 2113 return retValue.toString(); 2114 } // stripTags() 2115 2116 /** 2117 * <p>{@summary Strips characters from the given input that are not 2118 * allowed (or should be at least avoided) for a file or folder name on 2119 * most or all operating systems.}</p> 2120 * <p>The following characters will be stripped:</p> 2121 * <dl> 2122 * <dt><b>:</b> (colon)</dt><dd>On Windows systems it is used to separate 2123 * the drive letter from the path and file name; on Unix-like operating 2124 * systems (including MacOS) it would be valid, but it can cause issues on 2125 * the {@code PATH} and {@code CLASSPATH} variables on these operating 2126 * systems.</dd> 2127 * <dt><b>\</b> (backslash)</dt><dd>On Windows systems it is used as the 2128 * path separator, while on Unix-like operating systems it is problematic 2129 * in other ways. For example, it is used to escape blanks in not-quoted 2130 * file or folder names.</dd> 2131 * <dt><b>/</b> (slash or forward slash)</dt><dd>The path separator on 2132 * Unix-like operating systems, but Java will use it that way on Windows 2133 * systems, too.</dd> 2134 * <dt><b>;</b> (semicolon)</dt><dd>It can cause issues on the {@code PATH} 2135 * and {@code CLASSPATH} variables on Windows.</dd> 2136 * <dt><b>*</b> (asterisk)</dt><dd>The asterisk is often used as wild card 2137 * character in shell programs to find groups of files; using it in a file 2138 * name can cause funny effects.</dd> 2139 * <dt><b>?</b> (question mark)</dt><dd>The question mark is used on 2140 * Windows as a wild card for a single character; similar to the asterisk, 2141 * it can cause funny effects when used in a file name.</dd> 2142 * <dt><b>"</b> (double quotes)</dt> 2143 * <dt><b>'</b> (single quotes)</dt><dd>Both have some potential to 2144 * confuse the various shell programs of all operating systems.</dd> 2145 * <dt><b>@</b> ('at'-sign)</dt><dd>Although it is allowed for file and 2146 * folder names, it causes issues when used in the URL for that respective 2147 * file.</dd> 2148 * <dt><b>|</b> (pipe symbol)</dt><dd>Similar to the '*' (asterisk), the 2149 * pipe-symbol has – as the name already indicates - a meaning on most 2150 * shells that would make it difficult to manage files that contains this 2151 * character in their names.</dd> 2152 * <dt><b><</b> (less than)</dt> 2153 * <dt><b>></b> (greater than)</dt><dd>Like the pipe, these two have a 2154 * meaning on most shells that would make it difficult to manage files 2155 * that contains one of these characters in their names.</dd> 2156 * <dt>Whitespace</dt><dd>Only blanks will remain, any other whitespace 2157 * characters are stripped.</dd> 2158 * </dl> 2159 * <p>Finally, the method will strip all leading and trailing blanks; 2160 * although blanks are usually allowed, they are confusing when not 2161 * surrounded by some visible characters.</p> 2162 * <p>Especially regarding the characters that are critical for shells 2163 * ('*', '?', '"', ''', '|', '<', and '>') this method is 2164 * over-cautious, as most shells could handle them after proper escaping 2165 * the offending characters or quoting the file name.</p> 2166 * <p>This method furthermore assumes that any other Unicode character is 2167 * valid for a file or folder name; unfortunately, there are filesystems 2168 * where this is not true.</p> 2169 * 2170 * @note This method will not take care about the length of the returned 2171 * String; this means the result to a call to this method may still be 2172 * invalid as a file or folder name because it is too long. 2173 * 2174 * @param input The input String, denoting a file or folder name - 2175 * <i>not</i> a full path. 2176 * @return The String without the characters that are invalid for a file 2177 * name. This value will never be {@code null} or empty. 2178 * @throws NullArgumentException The input is {@code null}. 2179 * @throws EmptyArgumentException The input is the empty String. 2180 * @throws ValidationException After stripping the invalid characters the 2181 * return value would be empty. 2182 * 2183 * @since 0.0.5 2184 */ 2185 @SuppressWarnings( "SwitchStatementWithTooManyBranches" ) 2186 @API( status = STABLE, since = "0.0.5" ) 2187 public static final String stripToFilename( final CharSequence input ) throws ValidationException 2188 { 2189 final var len = requireNotEmptyArgument( input, "input" ).length(); 2190 final var buffer = new StringBuilder( len ); 2191 ScanLoop: for( var i = 0; i < len; ++i ) 2192 { 2193 final var currentCharacter = input.charAt( i ); 2194 Selector: 2195 //noinspection SwitchStatementWithTooManyBranches,EnhancedSwitchMigration 2196 switch( currentCharacter ) 2197 { 2198 case ':': 2199 case '\\': 2200 case '/': 2201 case ';': 2202 case '*': 2203 case '"': 2204 case '\'': 2205 case '@': 2206 case '|': 2207 case '?': 2208 case '<': 2209 case '>': 2210 continue ScanLoop; 2211 2212 default: 2213 { 2214 if( (currentCharacter == ' ') || (!isISOControl( currentCharacter ) && !isWhitespace( currentCharacter )) ) 2215 { 2216 buffer.append( currentCharacter ); 2217 } 2218 break Selector; 2219 } 2220 } // Selector: 2221 } // ScanLoop: 2222 2223 final var retValue = buffer.toString().trim(); 2224 if( retValue.isEmpty() ) 2225 { 2226 throw new ValidationException( "After stripping the invalid characters from '%1$s' there do not remain enough characters for a valid file name".formatted( input.toString() ) ); 2227 } 2228 2229 //---* Done *---------------------------------------------------------- 2230 return retValue; 2231 } // stripToFilename() 2232 2233 /** 2234 * Strips HTML or XML comments from the given String. 2235 * 2236 * @param input The HTML/XML string. 2237 * @return The string without the comments. 2238 * 2239 * @since 0.0.5 2240 */ 2241 @API( status = STABLE, since = "0.0.5" ) 2242 public static final String stripXMLComments( final CharSequence input ) 2243 { 2244 final var matcher = m_CommentRemovalPattern.matcher( requireNonNullArgument( input, "input" ) ); 2245 final var retValue = matcher.replaceAll( EMPTY_STRING ); 2246 2247 //---* Done *---------------------------------------------------------- 2248 return retValue; 2249 } // stripXMLComments() 2250 2251 /** 2252 * <p>{@summary Gets the String that is nested in between two Strings.} 2253 * Only the first match is returned.</p> 2254 * <p>A {@code null} input String returns {@code null}. A {@code null} 2255 * open/close returns {@code null} (no match). An empty ("") 2256 * open and close returns an empty string.</p> 2257 * <pre><code> 2258 * substringBetween( "wx[b]yz", "[", "]" ) = "b" 2259 * substringBetween( null, *, * ) = Optional.empty() 2260 * substringBetween( *, null, * ) = Optional.empty() 2261 * substringBetween( *, *, null ) = Optional.empty() 2262 * substringBetween( "", "", "" ) = "" 2263 * substringBetween( "", "", "]" ) = Optional.empty() 2264 * substringBetween( "", "[", "]" ) = Optional.empty() 2265 * substringBetween( "yabcz", "", "" ) = "" 2266 * substringBetween( "yabcz", "y", "z" ) = "abc" 2267 * substringBetween( "yabczyabcz", "y", "z" ) = "abc" 2268 * </code></pre> 2269 * 2270 * @inspired Apache Commons Lang 2271 * 2272 * @param input The String containing the substring, may be 2273 * {@code null}. 2274 * @param open The String before the substring, may be {@code null}. 2275 * @param close The String after the substring, may be {@code null}. 2276 * @return An instance of 2277 * {@link Optional} 2278 * that holds the found substring; will be 2279 * {@linkplain Optional#empty() empty} if no match 2280 * 2281 * @since 0.4.8 2282 */ 2283 @API( status = STABLE, since = "0.4.8" ) 2284 public static final Optional<String> substringBetween( final String input, final String open, final String close ) 2285 { 2286 String found = null; 2287 2288 if( Stream.of( input, open, close ).allMatch( Objects::nonNull ) ) 2289 { 2290 if( open.isEmpty() && close.isEmpty() ) 2291 { 2292 found = EMPTY_STRING; 2293 } 2294 else 2295 { 2296 final var start = input.indexOf(open); 2297 if( start >= 0 ) 2298 { 2299 final var end = input.indexOf( close, start + open.length() ); 2300 if( end > 0 ) 2301 { 2302 found = input.substring( start + open.length(), end ); 2303 } 2304 } 2305 } 2306 } 2307 final var retValue = Optional.ofNullable( found ); 2308 2309 //---* Done *---------------------------------------------------------- 2310 return retValue; 2311 } // substringBetween() 2312 2313 /** 2314 * <p>{@summary Searches a String for substrings delimited by a start and 2315 * end tag, returning all matching substrings in a 2316 * {@link java.util.SequencedCollection Collection}.} That collection is 2317 * empty if no match was found.</p> 2318 * <p>No match can be found in a {@code null} input String; same for a 2319 * {@code null} or an empty ("") open or close.</p> 2320 * <pre><code> 2321 * substringsBetween( "[a][b][c]", "[", "]" ) = ["a","b","c"] 2322 * substringsBetween( null, *, * ) = [] 2323 * substringsBetween( *, null, * ) = [] 2324 * substringsBetween( *, *, null ) = [] 2325 * substringsBetween( "", "[", "]" ) = [] 2326 * </code></pre> 2327 * 2328 * @param input The String containing the substrings, may be 2329 * {@code null}. 2330 * @param open The String identifying the start of the substring, may 2331 * be {@code null}. 2332 * @param close The String identifying the end of the substring, may be 2333 * {@code null}. 2334 * @return A 2335 * {@link SequencedCollection Collection} 2336 * with the found substrings, in the sequence they have in the input 2337 * String. The collection is mutable. 2338 * 2339 * @since 0.4.8 2340 */ 2341 @API( status = STABLE, since = "0.4.8" ) 2342 public static final SequencedCollection<String> substringsBetween( final String input, final String open, final String close) 2343 { 2344 final SequencedCollection<String> retValue = new ArrayList<>(); 2345 2346 if( Stream.of( input, open, close ).allMatch( StringUtils::isNotEmpty ) ) 2347 { 2348 final var strLen = input.length(); 2349 final var closeLen = close.length(); 2350 final var openLen = open.length(); 2351 var pos = 0; 2352 ScanLoop: while( pos < strLen - closeLen ) 2353 { 2354 var start = input.indexOf( open, pos ); 2355 if( start < 0 ) break ScanLoop; 2356 start += openLen; 2357 final var end = input.indexOf( close, start ); 2358 if (end < 0) break ScanLoop; 2359 retValue.add( input.substring( start, end ) ); 2360 pos = end + closeLen; 2361 } // ScanLoop: 2362 } 2363 2364 //---* Done *---------------------------------------------------------- 2365 return retValue; 2366 } // substringsBetween() 2367 2368 /** 2369 * Unescapes a string containing entity escapes to a string containing the 2370 * actual Unicode characters corresponding to the escapes. Supports HTML 2371 * 5.0 entities.<br> 2372 * <br>For example, the string 2373 * "&lt;Fran&ccedil;ais&gt;" will become 2374 * "<Français>".<br> 2375 * <br>If an entity is unrecognised, it is left alone, and inserted 2376 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2377 * will become ">&zzzz;x". 2378 * 2379 * @param input The {@code String} to unescape, may be {@code null}. 2380 * @return A new unescaped {@code String}, {@code null} if the given 2381 * string was already {@code null}. 2382 * 2383 * @see #escapeHTML(CharSequence) 2384 * @see #escapeHTML(Appendable,CharSequence) 2385 * 2386 * @since 0.0.5 2387 */ 2388 @API( status = STABLE, since = "0.0.5" ) 2389 public static final String unescapeHTML( final CharSequence input ) 2390 { 2391 final var retValue = nonNull( input ) ? HTML50.unescape( input ) : null; 2392 2393 //---* Done *---------------------------------------------------------- 2394 return retValue; 2395 } // unescapeHTML() 2396 2397 /** 2398 * Unescapes a string containing entity escapes to a string containing the 2399 * actual Unicode characters corresponding to the escapes and writes it to 2400 * the given 2401 * {@link Appendable}. 2402 * Supports HTML 4.0 entities.<br> 2403 * <br>For example, the string 2404 * "&lt;Fran&ccedil;ais&gt;" will become 2405 * "<Français>".<br> 2406 * <br>If an entity is unrecognised, it is left alone, and inserted 2407 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2408 * will become ">&zzzz;x". 2409 * 2410 * @param appendable The appendable receiving the unescaped string. 2411 * @param input The {@code String} to unescape, may be {@code null}. 2412 * @throws NullArgumentException The appendable is {@code null}. 2413 * @throws IOException An IOException occurred. 2414 * 2415 * @see #escapeHTML(CharSequence) 2416 * 2417 * @since 0.0.5 2418 */ 2419 @API( status = STABLE, since = "0.0.5" ) 2420 public static final void unescapeHTML( final Appendable appendable, final CharSequence input ) throws IOException 2421 { 2422 requireNonNullArgument( appendable, "appendable" ); 2423 2424 if( nonNull( input ) ) HTML50.unescape( appendable, input ); 2425 } // unescapeHTML() 2426 2427 /** 2428 * <p>{@summary Unescapes an XML string containing XML entity escapes to a 2429 * string containing the actual Unicode characters corresponding to the 2430 * escapes.}</p> 2431 * <p>If an entity is unrecognised, it is left alone, and inserted 2432 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2433 * will become ">&zzzz;x".</p> 2434 * 2435 * @param input The {@code String} to unescape, may be {@code null}. 2436 * @return A new unescaped {@code String}, {@code null} if the given 2437 * string was already {@code null}. 2438 * 2439 * @see #escapeXML(CharSequence) 2440 * @see #escapeXML(Appendable,CharSequence) 2441 * 2442 * @since 0.0.5 2443 */ 2444 @API( status = STABLE, since = "0.0.5" ) 2445 public static final String unescapeXML( final CharSequence input ) 2446 { 2447 final var retValue = nonNull( input ) ? XML.unescape( input ) : null; 2448 2449 //---* Done *---------------------------------------------------------- 2450 return retValue; 2451 } // unescapeXML() 2452 2453 /** 2454 * <p>{@summary Unescapes an XML String containing XML entity escapes to a 2455 * String containing the actual Unicode characters corresponding to the 2456 * escapes and writes it to the given 2457 * {@link Appendable}.}</p> 2458 * <p>If an entity is unrecognised, it is left alone, and inserted 2459 * verbatim into the result string. e.g. "&gt;&zzzz;x" 2460 * will become ">&zzzz;x".</p> 2461 * 2462 * @param appendable The appendable receiving the unescaped string. 2463 * @param input The {@code String} to unescape, may be {@code null}. 2464 * @throws NullArgumentException The writer is {@code null}. 2465 * @throws IOException An IOException occurred. 2466 * 2467 * @see #escapeXML(CharSequence) 2468 * 2469 * @since 0.0.5 2470 */ 2471 @API( status = STABLE, since = "0.0.5" ) 2472 public static final void unescapeXML( final Appendable appendable, final CharSequence input ) throws IOException 2473 { 2474 requireNonNullArgument( appendable, "appendable" ); 2475 2476 if( nonNull( input ) ) XML.unescape( appendable, input ); 2477 } // unescapeXML() 2478 2479 /** 2480 * Returns the given URL encoded String in its decoded form, using the 2481 * UTF-8 character encoding.<br> 2482 * <br>Internally, this method and 2483 * {@link #urlEncode(CharSequence)} 2484 * make use of the methods from 2485 * {@link java.net.URLDecoder} 2486 * and 2487 * {@link java.net.URLEncoder}, respectively. The methods here were 2488 * introduced to simplify the handling, as first only the UTF-8 encoding 2489 * should be used - making the second argument for the methods 2490 * {@link java.net.URLDecoder#decode(String, String) decode()}/ 2491 * {@link java.net.URLEncoder#encode(String, String) encode()} 2492 * obsolete - and second, they could throw an 2493 * {@link UnsupportedEncodingException} - although this should never occur 2494 * when UTF-8 encoding is used. 2495 * 2496 * @param input The input String. 2497 * @return The decoded result. 2498 * 2499 * @see java.net.URLDecoder#decode(String, String) 2500 * 2501 * @since 0.0.5 2502 */ 2503 @API( status = STABLE, since = "0.0.5" ) 2504 public static final String urlDecode( final CharSequence input ) 2505 { 2506 final var retValue = decode( requireNonNullArgument( input, "input" ).toString(), UTF8 ); 2507 2508 //---* Done *---------------------------------------------------------- 2509 return retValue; 2510 } // urlDecode() 2511 2512 /** 2513 * Returns the given String in its URL encoded form, using the 2514 * UTF-8 character encoding. 2515 * 2516 * @param input The input String. 2517 * @return The URL encoded result. 2518 * 2519 * @see java.net.URLEncoder#encode(String, String) 2520 * @see #urlDecode(CharSequence) 2521 * 2522 * @since 0.0.5 2523 */ 2524 @API( status = STABLE, since = "0.0.5" ) 2525 public static final String urlEncode( final CharSequence input ) 2526 { 2527 final var retValue = encode( requireNonNullArgument( input, "input" ).toString(), UTF8 ); 2528 2529 //---* Done *---------------------------------------------------------- 2530 return retValue; 2531 } // urlEncode() 2532} 2533// class StringUtils 2534 2535/* 2536 * End of File 2537 */