1 /** 2 * This file Copyright (c) 2003-2018 Magnolia International 3 * Ltd. (http://www.magnolia-cms.com). All rights reserved. 4 * 5 * 6 * This file is dual-licensed under both the Magnolia 7 * Network Agreement and the GNU General Public License. 8 * You may elect to use one or the other of these licenses. 9 * 10 * This file is distributed in the hope that it will be 11 * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the 12 * implied warranty of MERCHANTABILITY or FITNESS FOR A 13 * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT. 14 * Redistribution, except as permitted by whichever of the GPL 15 * or MNA you select, is prohibited. 16 * 17 * 1. For the GPL license (GPL), you can redistribute and/or 18 * modify this file under the terms of the GNU General 19 * Public License, Version 3, as published by the Free Software 20 * Foundation. You should have received a copy of the GNU 21 * General Public License, Version 3 along with this program; 22 * if not, write to the Free Software Foundation, Inc., 51 23 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 * 2. For the Magnolia Network Agreement (MNA), this file 26 * and the accompanying materials are made available under the 27 * terms of the MNA which accompanies this distribution, and 28 * is available at http://www.magnolia-cms.com/mna.html 29 * 30 * Any modifications to this file must keep this entire header 31 * intact. 32 * 33 */ 34 package info.magnolia.cms.util; 35 36 import java.util.regex.Pattern; 37 38 import org.apache.commons.lang3.StringUtils; 39 40 41 /** 42 * An implementation of URLPattern which matches strings using simple <code>*</code> or <code>?</code> wildcards. 43 */ 44 public final class SimpleUrlPattern implements UrlPattern { 45 46 /** 47 * @deprecated URL_CHAR_PATTERN used to be a common prefix for both single/multiple-characters wildcard patterns— 48 * before 4.3.1, it used to be a complex white-listing regex. Standalone, it is however of no use. 49 */ 50 @Deprecated 51 public static final String URL_CHAR_PATTERN = "."; 52 53 /** 54 * Regexp pattern used for the simple keyword <code>*</code>. Matches 0 or more characters. 55 */ 56 public static final String MULTIPLE_CHAR_PATTERN = ".*"; 57 58 /** 59 * Regexp pattern used for the simple keyword <code>?</code>. Matches 0 or 1 character. 60 */ 61 public static final String SINGLE_CHAR_PATTERN = ".?"; 62 63 /** 64 * Regexp pattern used in match(). 65 */ 66 private Pattern pattern; 67 68 /** 69 * Pattern length. Longer patterns have higher priority. 70 */ 71 private int length; 72 73 /** 74 * internal pattern string. 75 */ 76 private String patternString; 77 78 /** 79 * Default constructor used by ContentToBean. 80 */ 81 public SimpleUrlPattern() { 82 } 83 84 /** 85 * Compile a regexp pattern handling <code>*</code> and <code>?</code> chars. 86 * 87 * @param string input string 88 */ 89 public SimpleUrlPattern(String string) { 90 setPatternString(string); 91 } 92 93 /** 94 * Replace all "*" with <code>RegexWildcardPattern.MULTIPLE_CHAR_PATTERN</code>. 95 * 96 * @param str input string 97 * @return string where all the occurrences of <code>*</code> and <code>?</code> are replaced with a regexp 98 * pattern. 99 */ 100 public static String getEncodedString(String str) { 101 final StringBuilder builder = new StringBuilder(); 102 char[] chars = str.toCharArray(); 103 int i = 0, last = 0; 104 while (i < chars.length) { 105 char c = chars[i]; 106 if (c == '*') { 107 builder.append('(').append(chars, last, i - last).append(')').append(MULTIPLE_CHAR_PATTERN); 108 last = i + 1; 109 } else if (c == '?') { 110 builder.append('(').append(chars, last, i - last).append(')').append(SINGLE_CHAR_PATTERN); 111 last = i + 1; 112 } else if (c == '.') { 113 builder.append('(').append(chars, last, i - last).append(')').append("\\."); 114 last = i + 1; 115 } 116 i++; 117 } 118 builder.append(chars, last, i - last); 119 return builder.toString(); 120 } 121 122 @Override 123 public boolean match(String str) { 124 return this.pattern.matcher(str).matches(); 125 } 126 127 @Override 128 public int getLength() { 129 return this.length; 130 } 131 132 @Override 133 public String getPatternString() { 134 return patternString; 135 } 136 137 /** 138 * Mainly used by ContentToBean. 139 */ 140 public void setPatternString(String patternString) { 141 this.length = StringUtils.removeEnd(patternString, "*").length(); 142 this.pattern = Pattern.compile(getEncodedString(patternString), Pattern.DOTALL); 143 this.patternString = patternString; 144 } 145 146 @Override 147 public String toString() { 148 // don't use pattern.pattern(), but keep the original string. 149 // The "compiled" pattern will display the ugly patterns like MULTIPLE_CHAR_PATTERN instead of simple * 150 return "SimpleUrlPattern{" + patternString + '}'; 151 } 152 }