View Javadoc
1   /**
2    * This file Copyright (c) 2003-2018 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.cms.util;
35  
36  import java.util.regex.Pattern;
37  
38  import org.apache.commons.lang3.StringUtils;
39  
40  
41  /**
42   * An implementation of URLPattern which matches strings using simple <code>*</code> or <code>?</code> wildcards.
43   */
44  public final class SimpleUrlPattern implements UrlPattern {
45  
46      /**
47       * @deprecated URL_CHAR_PATTERN used to be a common prefix for both single/multiple-characters wildcard patterns—
48       * before 4.3.1, it used to be a complex white-listing regex. Standalone, it is however of no use.
49       */
50      @Deprecated
51      public static final String URL_CHAR_PATTERN = ".";
52  
53      /**
54       * Regexp pattern used for the simple keyword <code>*</code>. Matches 0 or more characters.
55       */
56      public static final String MULTIPLE_CHAR_PATTERN = ".*";
57  
58      /**
59       * Regexp pattern used for the simple keyword <code>?</code>. Matches 0 or 1 character.
60       */
61      public static final String SINGLE_CHAR_PATTERN = ".?";
62  
63      /**
64       * Regexp pattern used in match().
65       */
66      private Pattern pattern;
67  
68      /**
69       * Pattern length. Longer patterns have higher priority.
70       */
71      private int length;
72  
73      /**
74       * internal pattern string.
75       */
76      private String patternString;
77  
78      /**
79       * Default constructor used by ContentToBean.
80       */
81      public SimpleUrlPattern() {
82      }
83  
84      /**
85       * Compile a regexp pattern handling <code>*</code> and <code>?</code> chars.
86       *
87       * @param string input string
88       */
89      public SimpleUrlPattern(String string) {
90          setPatternString(string);
91      }
92  
93      /**
94       * Replace all "*" with <code>RegexWildcardPattern.MULTIPLE_CHAR_PATTERN</code>.
95       *
96       * @param str input string
97       * @return string where all the occurrences of <code>*</code> and <code>?</code> are replaced with a regexp
98       *         pattern.
99       */
100     public static String getEncodedString(String str) {
101         final StringBuilder builder = new StringBuilder();
102         char[] chars = str.toCharArray();
103         int i = 0, last = 0;
104         while (i < chars.length) {
105             char c = chars[i];
106             if (c == '*') {
107                 builder.append('(').append(chars, last, i - last).append(')').append(MULTIPLE_CHAR_PATTERN);
108                 last = i + 1;
109             } else if (c == '?') {
110                 builder.append('(').append(chars, last, i - last).append(')').append(SINGLE_CHAR_PATTERN);
111                 last = i + 1;
112             } else if (c == '.') {
113                 builder.append('(').append(chars, last, i - last).append(')').append("\\.");
114                 last = i + 1;
115             }
116             i++;
117         }
118         builder.append(chars, last, i - last);
119         return builder.toString();
120     }
121 
122     @Override
123     public boolean match(String str) {
124         return this.pattern.matcher(str).matches();
125     }
126 
127     @Override
128     public int getLength() {
129         return this.length;
130     }
131 
132     @Override
133     public String getPatternString() {
134         return patternString;
135     }
136 
137     /**
138      * Mainly used by ContentToBean.
139      */
140     public void setPatternString(String patternString) {
141         this.length = StringUtils.removeEnd(patternString, "*").length();
142         this.pattern = Pattern.compile(getEncodedString(patternString), Pattern.DOTALL);
143         this.patternString = patternString;
144     }
145 
146     @Override
147     public String toString() {
148         // don't use pattern.pattern(), but keep the original string.
149         // The "compiled" pattern will display the ugly patterns like MULTIPLE_CHAR_PATTERN instead of simple *
150         return "SimpleUrlPattern{" + patternString + '}';
151     }
152 }