1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34 package info.magnolia.cms.util;
35
36 import info.magnolia.cms.core.SystemProperty;
37 import info.magnolia.objectfactory.Components;
38
39 import java.lang.reflect.InvocationTargetException;
40 import java.lang.reflect.Method;
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58 public class UnicodeNormalizer {
59 private static final org.slf4j.Logger log = org.slf4j.LoggerFactory.getLogger(UnicodeNormalizer.class);
60
61 private static final String JAVA6_NORMALIZER_CLASS = "java.text.Normalizer";
62 private static final String JAVA6_FORMPARAM_CLASS = "java.text.Normalizer$Form";
63 private static final String ICU_NORMALIZER_CLASS = "com.ibm.icu.text.Normalizer";
64
65 private static final Normalizer normalizer = Components.getSingleton(Normalizer.class);
66
67
68
69
70 public static String normalizeNFC(String in) {
71 return normalizer.normalizeNFC(in);
72
73
74
75
76
77
78
79
80
81
82
83 }
84
85 public interface Normalizer {
86 String normalizeNFC(String in);
87 }
88
89
90
91
92
93 public static final class Java6ReflectionNormalizer implements Normalizer {
94 private final Method normalize;
95 private final Object nfc;
96
97 public Java6ReflectionNormalizer() {
98 try {
99 final Class<?> normalizer = Class.forName(JAVA6_NORMALIZER_CLASS);
100 final Class<?> form = Class.forName(JAVA6_FORMPARAM_CLASS);
101 normalize = normalizer.getMethod("normalize", CharSequence.class, form);
102 nfc = form.getField("NFC").get(null);
103 } catch (ClassNotFoundException e) {
104 throw new RuntimeException(e);
105 } catch (IllegalAccessException e) {
106 throw new RuntimeException(e);
107 } catch (NoSuchFieldException e) {
108 throw new RuntimeException(e);
109 } catch (NoSuchMethodException e) {
110 throw new RuntimeException(e);
111 }
112
113 }
114
115 public String normalizeNFC(String in) {
116 try {
117 return (String) normalize.invoke(null, in, nfc);
118 } catch (IllegalAccessException e) {
119 throw new RuntimeException(e);
120 } catch (InvocationTargetException e) {
121 throw new RuntimeException(e);
122 }
123 }
124 }
125
126 public static final class ICUNormalizer implements UnicodeNormalizer.Normalizer {
127 public String normalizeNFC(String in) {
128 return com.ibm.icu.text.Normalizer.normalize(in, com.ibm.icu.text.Normalizer.NFC);
129 }
130 }
131
132 public static final class NonNormalizer implements UnicodeNormalizer.Normalizer {
133 public String normalizeNFC(String in) {
134 return in;
135 }
136 }
137
138 public static final class AutoDetectNormalizer implements Normalizer {
139 private final Normalizer delegate;
140
141 public AutoDetectNormalizer() {
142 Normalizer candidate;
143 if (!SystemProperty.getBooleanProperty(SystemProperty.MAGNOLIA_UTF8_ENABLED)) {
144 candidate = new NonNormalizer();
145 } else {
146 try {
147 Class.forName(JAVA6_NORMALIZER_CLASS);
148 candidate = new Java6ReflectionNormalizer();
149 log.info("Running on Java 6, using {} for unicode form normalization.", candidate.getClass());
150 } catch (ClassNotFoundException e) {
151 log.warn("Not running on Java 6 ({} not found). Attempting to locate the ICU4J library.", JAVA6_NORMALIZER_CLASS);
152 try {
153 Class.forName(ICU_NORMALIZER_CLASS);
154 candidate = new ICUNormalizer();
155 log.info("ICU4J found, using {} for Unicode form normalization.", candidate.getClass());
156 } catch (ClassNotFoundException e2) {
157 log.warn("ICU4J not found ({} not found), Unicode will not be 100% supported; no Unicode form normalization available. If Java 6 is not an option, you can get the ICU4J library from http://www.icu-project.org/.", ICU_NORMALIZER_CLASS);
158 candidate = new NonNormalizer();
159 }
160 }
161 }
162 this.delegate = candidate;
163 }
164
165 public String normalizeNFC(String in) {
166 return delegate.normalizeNFC(in);
167 }
168 }
169
170 }