|
| 1 | +/******************************************************************************* |
| 2 | + * jMCS project ( http://www.jmmc.fr/dev/jmcs ) |
| 3 | + ******************************************************************************* |
| 4 | + * Copyright (c) 2013, CNRS. All rights reserved. |
| 5 | + * |
| 6 | + * Redistribution and use in source and binary forms, with or without |
| 7 | + * modification, are permitted provided that the following conditions are met: |
| 8 | + * - Redistributions of source code must retain the above copyright |
| 9 | + * notice, this list of conditions and the following disclaimer. |
| 10 | + * - Redistributions in binary form must reproduce the above copyright |
| 11 | + * notice, this list of conditions and the following disclaimer in the |
| 12 | + * documentation and/or other materials provided with the distribution. |
| 13 | + * - Neither the name of the CNRS nor the names of its contributors may be |
| 14 | + * used to endorse or promote products derived from this software without |
| 15 | + * specific prior written permission. |
| 16 | + * |
| 17 | + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" |
| 18 | + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE |
| 19 | + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE |
| 20 | + * ARE DISCLAIMED. IN NO EVENT SHALL CNRS BE LIABLE FOR ANY DIRECT, INDIRECT, |
| 21 | + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT |
| 22 | + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, |
| 23 | + * OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF |
| 24 | + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING |
| 25 | + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, |
| 26 | + * EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. |
| 27 | + ******************************************************************************/ |
| 28 | +package fr.jmmc.jmcs.util; |
| 29 | + |
| 30 | +import java.text.Normalizer; |
| 31 | +import java.util.regex.Pattern; |
| 32 | + |
| 33 | +/** |
| 34 | + * This class is copied from Jmcs (same package) in order to let OITools compile properly |
| 35 | + * but at runtime only one implementation will be loaded (by class loader) |
| 36 | + * |
| 37 | + * Note: Jmcs Changes must be reported here to avoid runtime issues ! |
| 38 | + * |
| 39 | + * This class provides several helper methods related to String handling |
| 40 | + * @author Laurent BOURGES. |
| 41 | + */ |
| 42 | +public final class StringUtils { |
| 43 | + |
| 44 | + /** Empty String constant '' */ |
| 45 | + public final static String STRING_EMPTY = ""; |
| 46 | + /** String constant containing 1 space character ' ' */ |
| 47 | + public final static String STRING_SPACE = " "; |
| 48 | + /** String constant containing 1 underscore character '_' */ |
| 49 | + public final static String STRING_UNDERSCORE = "_"; |
| 50 | + /** String constant containing 1 minus sign character '-' */ |
| 51 | + public final static String STRING_MINUS_SIGN = "-"; |
| 52 | + /** RegExp expression to match the underscore character '_' */ |
| 53 | + private final static Pattern PATTERN_UNDERSCORE = Pattern.compile(STRING_UNDERSCORE); |
| 54 | + /** RegExp expression to match white spaces (1..n) */ |
| 55 | + private final static Pattern PATTERN_WHITE_SPACE_MULTIPLE = Pattern.compile("\\s+"); |
| 56 | + /** regular expression used to match characters different than alpha/numeric/_/+/- (1..n) */ |
| 57 | + private final static Pattern PATTERN_NON_ALPHA_NUM = Pattern.compile("[^a-zA-Z_\\+\\-0-9]+"); |
| 58 | + /** regular expression used to match characters different than alpha/numeric/_/-/. (1..n) */ |
| 59 | + private final static Pattern PATTERN_NON_FILE_NAME = Pattern.compile("[^a-zA-Z0-9\\-_\\.]"); |
| 60 | + /** regular expression used to match characters with accents */ |
| 61 | + private final static Pattern PATTERN_ACCENT_CHARS = Pattern.compile("\\p{InCombiningDiacriticalMarks}+"); |
| 62 | + /** regular expression used to match characters different than numeric (1..n) */ |
| 63 | + private final static Pattern PATTERN_NON_NUM = Pattern.compile("[^0-9]+"); |
| 64 | + /** RegExp expression to match carriage return */ |
| 65 | + private final static Pattern PATTERN_CR = Pattern.compile("\n"); |
| 66 | + /** RegExp expression to match tags */ |
| 67 | + private final static Pattern PATTERN_TAGS = Pattern.compile("\\<.*?\\>"); |
| 68 | + /** RegExp expression to SGML entities */ |
| 69 | + private final static Pattern PATTERN_AMP = Pattern.compile("&"); |
| 70 | + /** RegExp expression to start tag */ |
| 71 | + private final static Pattern PATTERN_LT = Pattern.compile("<"); |
| 72 | + /** RegExp expression to end tag */ |
| 73 | + private final static Pattern PATTERN_GT = Pattern.compile(">"); |
| 74 | + |
| 75 | + /** |
| 76 | + * Forbidden constructor |
| 77 | + */ |
| 78 | + private StringUtils() { |
| 79 | + super(); |
| 80 | + } |
| 81 | + |
| 82 | + /** |
| 83 | + * Test if value is set ie not empty |
| 84 | + * |
| 85 | + * @param value string value |
| 86 | + * @return true if value is NOT empty |
| 87 | + */ |
| 88 | + public static boolean isSet(final String value) { |
| 89 | + return !isEmpty(value); |
| 90 | + } |
| 91 | + |
| 92 | + /** |
| 93 | + * Test if value is empty (null or no chars) |
| 94 | + * |
| 95 | + * @param value string value |
| 96 | + * @return true if value is empty (null or no chars) |
| 97 | + */ |
| 98 | + public static boolean isEmpty(final String value) { |
| 99 | + return value == null || value.length() == 0; |
| 100 | + } |
| 101 | + |
| 102 | + /** |
| 103 | + * Test if value is empty (null or no chars after trim) |
| 104 | + * |
| 105 | + * @param value string value |
| 106 | + * @return true if value is empty (null or no chars after trim) |
| 107 | + */ |
| 108 | + public static boolean isTrimmedEmpty(final String value) { |
| 109 | + return isEmpty(value) || value.trim().length() == 0; |
| 110 | + } |
| 111 | + |
| 112 | + /* --- accent handling -------------------------------------------------- */ |
| 113 | + /** |
| 114 | + * Remove accents from any character i.e. remove diacritical marks |
| 115 | + * @param value input value |
| 116 | + * @return string value |
| 117 | + */ |
| 118 | + public static String removeAccents(final String value) { |
| 119 | + // Remove accent from characters (if any) (Java 1.6) |
| 120 | + final String normalized = Normalizer.normalize(value, Normalizer.Form.NFD); |
| 121 | + |
| 122 | + return PATTERN_ACCENT_CHARS.matcher(normalized).replaceAll(STRING_EMPTY); |
| 123 | + } |
| 124 | + |
| 125 | + /* --- common white space helper methods -------------------------------- */ |
| 126 | + |
| 127 | + /** |
| 128 | + * Trim and remove redundant white space characters |
| 129 | + * @param value input value |
| 130 | + * @return string value |
| 131 | + */ |
| 132 | + public static String cleanWhiteSpaces(final String value) { |
| 133 | + return isEmpty(value) ? STRING_EMPTY : replaceWhiteSpaces(value.trim(), STRING_SPACE); |
| 134 | + } |
| 135 | + |
| 136 | + /** |
| 137 | + * Remove any white space character |
| 138 | + * @param value input value |
| 139 | + * @return string value |
| 140 | + */ |
| 141 | + public static String removeWhiteSpaces(final String value) { |
| 142 | + return replaceWhiteSpaces(value, STRING_EMPTY); |
| 143 | + } |
| 144 | + |
| 145 | + /** |
| 146 | + * Remove any underscore character |
| 147 | + * @param value input value |
| 148 | + * @return string value |
| 149 | + */ |
| 150 | + public static String removeUnderscores(final String value) { |
| 151 | + return PATTERN_UNDERSCORE.matcher(value).replaceAll(STRING_EMPTY); |
| 152 | + } |
| 153 | + |
| 154 | + /** |
| 155 | + * Remove redundant white space characters |
| 156 | + * @param value input value |
| 157 | + * @return string value |
| 158 | + */ |
| 159 | + public static String removeRedundantWhiteSpaces(final String value) { |
| 160 | + return replaceWhiteSpaces(value, STRING_SPACE); |
| 161 | + } |
| 162 | + |
| 163 | + /** |
| 164 | + * Replace white space characters (1..n) by the underscore character |
| 165 | + * @param value input value |
| 166 | + * @return string value |
| 167 | + */ |
| 168 | + public static String replaceWhiteSpacesByUnderscore(final String value) { |
| 169 | + return replaceWhiteSpaces(value, STRING_UNDERSCORE); |
| 170 | + } |
| 171 | + |
| 172 | + /** |
| 173 | + * Replace white space characters (1..n) by the minus sign character |
| 174 | + * @param value input value |
| 175 | + * @return string value |
| 176 | + */ |
| 177 | + public static String replaceWhiteSpacesByMinusSign(final String value) { |
| 178 | + return replaceWhiteSpaces(value, STRING_MINUS_SIGN); |
| 179 | + } |
| 180 | + |
| 181 | + /** |
| 182 | + * Replace white space characters (1..n) by the given replacement string |
| 183 | + * @param value input value |
| 184 | + * @param replaceBy replacement string |
| 185 | + * @return string value |
| 186 | + */ |
| 187 | + public static String replaceWhiteSpaces(final String value, final String replaceBy) { |
| 188 | + return PATTERN_WHITE_SPACE_MULTIPLE.matcher(value).replaceAll(replaceBy); |
| 189 | + } |
| 190 | + |
| 191 | + /* --- common alpha numeric helper methods ------------------------------ */ |
| 192 | + /** |
| 193 | + * Remove any non alpha numeric character |
| 194 | + * @param value input value |
| 195 | + * @return string value |
| 196 | + */ |
| 197 | + public static String removeNonAlphaNumericChars(final String value) { |
| 198 | + return replaceNonAlphaNumericChars(value, STRING_EMPTY); |
| 199 | + } |
| 200 | + |
| 201 | + /** |
| 202 | + * Replace non alpha numeric characters (1..n) by the underscore character |
| 203 | + * @param value input value |
| 204 | + * @return string value |
| 205 | + */ |
| 206 | + public static String replaceNonAlphaNumericCharsByUnderscore(final String value) { |
| 207 | + return replaceNonAlphaNumericChars(value, STRING_UNDERSCORE); |
| 208 | + } |
| 209 | + |
| 210 | + /** |
| 211 | + * Replace non alpha numeric characters by the given replacement string |
| 212 | + * @param value input value |
| 213 | + * @param replaceBy replacement string |
| 214 | + * @return string value |
| 215 | + */ |
| 216 | + public static String replaceNonAlphaNumericChars(final String value, final String replaceBy) { |
| 217 | + return PATTERN_NON_ALPHA_NUM.matcher(value).replaceAll(replaceBy); |
| 218 | + } |
| 219 | + |
| 220 | + /** |
| 221 | + * Replace non numeric characters by the given replacement string |
| 222 | + * @param value input value |
| 223 | + * @param replaceBy replacement string |
| 224 | + * @return string value |
| 225 | + */ |
| 226 | + public static String replaceNonNumericChars(final String value, final String replaceBy) { |
| 227 | + return PATTERN_NON_NUM.matcher(value).replaceAll(replaceBy); |
| 228 | + } |
| 229 | + |
| 230 | + /** |
| 231 | + * Split the given value at non numeric characters |
| 232 | + * @param value input value |
| 233 | + * @return numeric string values |
| 234 | + */ |
| 235 | + public static String[] splitNonNumericChars(final String value) { |
| 236 | + return PATTERN_NON_NUM.split(value); |
| 237 | + } |
| 238 | + |
| 239 | + /* --- common file name helper methods ------------------------------ */ |
| 240 | + /** |
| 241 | + * Replace invalid file name characters (1..n) by the underscore character |
| 242 | + * @param value input value |
| 243 | + * @return string value |
| 244 | + */ |
| 245 | + public static String replaceNonFileNameCharsByUnderscore(final String value) { |
| 246 | + return replaceNonFileNameChars(value, STRING_UNDERSCORE); |
| 247 | + } |
| 248 | + |
| 249 | + /** |
| 250 | + * Replace invalid file name characters (1..n) by the given replacement string |
| 251 | + * @param value input value |
| 252 | + * @param replaceBy replacement string |
| 253 | + * @return string value |
| 254 | + */ |
| 255 | + public static String replaceNonFileNameChars(final String value, final String replaceBy) { |
| 256 | + return PATTERN_NON_FILE_NAME.matcher(value).replaceAll(replaceBy); |
| 257 | + } |
| 258 | + |
| 259 | + /* --- common helper methods ------------------------------ */ |
| 260 | + /** |
| 261 | + * Replace carriage return characters by the given replacement string |
| 262 | + * @param value input value |
| 263 | + * @param replaceBy replacement string |
| 264 | + * @return string value |
| 265 | + */ |
| 266 | + public static String replaceCR(final String value, final String replaceBy) { |
| 267 | + return PATTERN_CR.matcher(value).replaceAll(replaceBy); |
| 268 | + } |
| 269 | + |
| 270 | + /** |
| 271 | + * Remove any tag |
| 272 | + * @param value input value |
| 273 | + * @return string value |
| 274 | + */ |
| 275 | + public static String removeTags(final String value) { |
| 276 | + return PATTERN_TAGS.matcher(value).replaceAll(STRING_EMPTY); |
| 277 | + } |
| 278 | + |
| 279 | + /** |
| 280 | + * Encode special characters to entities |
| 281 | + * @param src input string |
| 282 | + * @return encoded value |
| 283 | + */ |
| 284 | + public static String encodeTagContent(final String src) { |
| 285 | + String out = PATTERN_AMP.matcher(src).replaceAll("&"); // Character [&] (xml restriction) |
| 286 | + out = PATTERN_LT.matcher(out).replaceAll("<"); // Character [<] (xml restriction) |
| 287 | + out = PATTERN_GT.matcher(out).replaceAll(">"); // Character [>] (xml restriction) |
| 288 | + return out; |
| 289 | + } |
| 290 | +} |
0 commit comments