View Javadoc
1   /**
2    * This file Copyright (c) 2009-2018 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.cms.util;
35  
36  import info.magnolia.cms.core.SystemProperty;
37  import info.magnolia.init.MagnoliaConfigurationProperties;
38  import info.magnolia.objectfactory.Components;
39  
40  import java.lang.reflect.InvocationTargetException;
41  import java.lang.reflect.Method;
42  
43  import javax.inject.Singleton;
44  
45  /**
46   * A wrapper around java.text.Normalizer
47   *
48   * <strong>note:</strong> If needed, one can use their own implementation,
49   * by setting the info.magnolia.cms.util.UnicodeNormalizer$Normalizer system property.
50   *
51   * @see java.text.Normalizer
52   * @see <a href="http://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms">http://en.wikipedia.org/wiki/Unicode_equivalence#Normal_forms</a> for more information.
53   */
54  public class UnicodeNormalizer {
55      private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(UnicodeNormalizer.class);
56  
57      private static final String JAVA6_NORMALIZER_CLASS = "java.text.Normalizer";
58      private static final String JAVA6_FORMPARAM_CLASS = "java.text.Normalizer$Form";
59  
60      private static final Normalizer normalizer = Components.getSingleton(Normalizer.class);
61  
62      public static String[] normalizeNFC(String[] values) {
63          if (values == null) {
64              return null;
65          }
66          for (int i = 0; i < values.length; i++) {
67              values[i] = normalizeNFC(values[i]);
68          }
69          return values;
70      }
71  
72      /**
73       * Normalizes the given String to the NFC form.
74       */
75      public static String normalizeNFC(String in) {
76          if (in == null) {
77              return null;
78          }
79          return normalizer.normalizeNFC(in);
80          /* if you're in dire need to debug:
81           try {
82              log.debug("not normalized: " + Arrays.toString(in.getBytes("UTF-8")) + " (" + in + ")");
83              String out = normalizer.normalizeNFC(in);
84              log.debug("    normalized: " + Arrays.toString(out.getBytes("UTF-8")) + " (" + out + ")");
85              return out;
86          } catch (UnsupportedEncodingException e) {
87              // do nothing
88          }
89          return in;
90          */
91      }
92  
93      /**
94       * Used to normalize a String.
95       */
96      public interface Normalizer {
97          String normalizeNFC(String in);
98      }
99  
100     /**
101      * Java 6 Normalizer wrapper.
102      */
103     @Singleton
104     public static final class Java6Normalizer implements Normalizer {
105         private final Method normalize;
106         private final Object nfc;
107 
108         public Java6Normalizer() {
109             try {
110                 final Class<?> normalizer = Class.forName(JAVA6_NORMALIZER_CLASS);
111                 final Class<?> form = Class.forName(JAVA6_FORMPARAM_CLASS);
112                 normalize = normalizer.getMethod("normalize", CharSequence.class, form);
113                 nfc = form.getField("NFC").get(null);
114             } catch (ClassNotFoundException e) {
115                 throw new RuntimeException(e);
116             } catch (IllegalAccessException e) {
117                 throw new RuntimeException(e);
118             } catch (NoSuchFieldException e) {
119                 throw new RuntimeException(e);
120             } catch (NoSuchMethodException e) {
121                 throw new RuntimeException(e);
122             }
123 
124         }
125 
126         @Override
127         public String normalizeNFC(String in) {
128             try {
129                 return (String) normalize.invoke(null, in, nfc);
130             } catch (IllegalAccessException e) {
131                 throw new RuntimeException(e);
132             } catch (InvocationTargetException e) {
133                 throw new RuntimeException(e);
134             }
135         }
136     }
137 
138     /**
139      * Returns the original value unchanged.
140      */
141     @Singleton
142     public static final class NonNormalizer implements UnicodeNormalizer.Normalizer {
143         @Override
144         public String normalizeNFC(String in) {
145             return in;
146         }
147     }
148 
149     /**
150      * Tries to load the normalizer dynamically and respects the property {@link info.magnolia.init.MagnoliaConfigurationProperties#MAGNOLIA_UTF8_ENABLED}.
151      */
152     @Singleton
153     public static final class AutoDetectNormalizer implements Normalizer {
154         private final Normalizer delegate;
155 
156         public AutoDetectNormalizer() {
157             Normalizer candidate;
158             if (SystemProperty.getBooleanProperty(MagnoliaConfigurationProperties.MAGNOLIA_UTF8_ENABLED)) {
159                 candidate = new Java6Normalizer();
160 
161             } else {
162                 candidate = new NonNormalizer();
163             }
164             this.delegate = candidate;
165         }
166 
167         @Override
168         public String normalizeNFC(String in) {
169             return delegate.normalizeNFC(in);
170         }
171     }
172 
173 }