View Javadoc
1   /**
2    * This file Copyright (c) 2012-2018 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.ui.workbench.search;
35  
36  import info.magnolia.cms.core.Path;
37  import info.magnolia.jcr.util.NodeTypes;
38  import info.magnolia.ui.vaadin.integration.contentconnector.JcrContentConnectorDefinition;
39  import info.magnolia.ui.workbench.container.OrderBy;
40  import info.magnolia.ui.workbench.list.FlatJcrContainer;
41  
42  import java.util.ArrayList;
43  import java.util.LinkedList;
44  import java.util.List;
45  import java.util.regex.Matcher;
46  import java.util.regex.Pattern;
47  
48  import javax.jcr.nodetype.NodeType;
49  
50  import org.apache.commons.lang3.StringUtils;
51  import org.apache.jackrabbit.util.Text;
52  import org.slf4j.Logger;
53  import org.slf4j.LoggerFactory;
54  
55  /**
56   * The jcr container backing the search view. It provides the subset of items returned by the current search. It will include <code>mgnl:folder</code> nodes if the latter are defined as "searchable".
57   *
58   * @see #findSearchableNodeTypes()
59   */
60  public class SearchJcrContainer extends FlatJcrContainer {
61  
62      private static final Logger log = LoggerFactory.getLogger(SearchJcrContainer.class);
63  
64      protected static final String WHERE_TEMPLATE_FOR_SEARCH = "lower(localname()) LIKE '%1$s%%' or " + SELECTOR_NAME + ".['%2$s'] IS NOT NULL %3$s";
65  
66      protected static final String CONTAINS_TEMPLATE_FOR_SEARCH = "contains(" + SELECTOR_NAME + ".*, '%1$s')";
67  
68      protected static final String JCR_SCORE_FUNCTION = "score(" + SELECTOR_NAME + ")";
69  
70      protected static final String JCR_IS_SAME_NODE_FUNCTION = "ISSAMENODE(" + SELECTOR_NAME + ", '%1$s')";
71  
72      private String fullTextExpression;
73  
74      private String whereCauseNodeTypes;
75      /**
76       * Will split a string like the following into simple terms. <em>Get "your facts" first and then "you can distort them" as much "as you please"</em>
77       * <ul>
78       * <li>Get
79       * <li>"your facts"
80       * <li>first
81       * <li>and
82       * <li>then
83       * <li>"you can distort them"
84       * <li>as
85       * <li>much
86       * <li>"as you please"
87       * </ul>
88       */
89      private static final Pattern simpleTermsRegexPattern = Pattern.compile("[^\\s\"']+|\"[^\"]*\"|'[^']*'");
90  
91      public SearchJcrContainer(JcrContentConnectorDefinition definition) {
92          super(definition);
93          whereCauseNodeTypes = super.getQueryWhereClauseNodeTypes();
94  
95          for (NodeType nt : getSearchableNodeTypes()) {
96              // include mgnl:folder if searchable
97              if (NodeTypes.Folder.NAME.equals(nt.getName())) {
98                  whereCauseNodeTypes += " or [jcr:primaryType] = '" + NodeTypes.Folder.NAME + "'";
99                  break;
100             }
101         }
102     }
103 
104     /**
105      * Overrides its default implementation to take further constraints from {@link #getQueryWhereClauseSearch()} into account.
106      */
107     @Override
108     protected String getQueryWhereClause() {
109         final String clauseWorkspacePath = getQueryWhereClauseWorkspacePath();
110         final String whereClauseSearch = getQueryWhereClauseSearch();
111 
112         String whereClause = "(" + getQueryWhereClauseNodeTypes() + ")";
113 
114         if (!"".equals(whereClauseSearch)) {
115             whereClause += " and (" + whereClauseSearch + ") ";
116         }
117 
118         if (!"".equals(clauseWorkspacePath)) {
119             if (!"".equals(whereClause)) {
120                 whereClause = clauseWorkspacePath + " and " + whereClause;
121             } else {
122                 whereClause += clauseWorkspacePath;
123             }
124         }
125 
126         if (!"".equals(whereClause)) {
127             whereClause = " where (" + whereClause + ")";
128         }
129 
130         log.debug("JCR query WHERE clause is {}", whereClause);
131         return whereClause;
132     }
133 
134     @Override
135     protected String getQueryWhereClauseNodeTypes() {
136         return whereCauseNodeTypes;
137     }
138 
139     /**
140      * Builds a string representing the constraints to be applied for this search. Used by the overridden {@link #getQueryWhereClause()} to augment the WHERE clause for this query.
141      * If the given search query string is absolute path (any given string starting with "/"), it adds searching by absolute path, otherwise it basically adds constraints on node names, property names and full-text search on all <code>searchable</code> properties, i.e. those not excluded by Magnolia/JackRabbit's indexing configuration.
142      * <p>
143      * See /magnolia-core/src/main/resources/info/magnolia/jackrabbit/indexing_configuration.xml
144      */
145     protected String getQueryWhereClauseSearch() {
146         if (StringUtils.isBlank(getFullTextExpression())) {
147             return "";
148         }
149         final String unescapedFullTextExpression = getFullTextExpression();
150         final String escapedSearch = Text.escapeIllegalJcrChars(unescapedFullTextExpression);
151         final String escapedSearchLowercase = Text.escapeIllegalJcrChars(unescapedFullTextExpression.toLowerCase());
152         final String escapedFullTextExpression = escapeFullTextExpression(unescapedFullTextExpression);
153 
154         String stmt;
155 
156         // The given search query string starts with "/" is considering as abs path
157         if (Path.isAbsolute(escapedFullTextExpression)) {
158             String rootPath = getConfiguration().getRootPath();
159 
160             if (StringUtils.isEmpty(rootPath) || "/".equals(rootPath) || escapedFullTextExpression.startsWith(rootPath)) {
161                 rootPath = "";
162             }
163 
164             stmt = String.format(JCR_IS_SAME_NODE_FUNCTION, rootPath + escapedFullTextExpression);
165         } else {
166             stmt = String.format(WHERE_TEMPLATE_FOR_SEARCH, escapedSearchLowercase, escapedSearch, String.format("or " + CONTAINS_TEMPLATE_FOR_SEARCH, escapedFullTextExpression));
167         }
168 
169         log.debug("Search where-clause is {}", stmt);
170         return stmt;
171     }
172 
173     public void setFullTextExpression(String fullTextExpression) {
174         this.fullTextExpression = fullTextExpression;
175     }
176 
177     public String getFullTextExpression() {
178         return fullTextExpression;
179     }
180 
181     @Override
182     protected String getJcrNameOrderByFunction() {
183         return JCR_SCORE_FUNCTION;
184     }
185 
186     @Override
187     /**
188      * Order by jcr score descending.
189      */
190     protected OrderBy getDefaultOrderBy(String property) {
191         return new OrderBy(property, false);
192     }
193 
194     /**
195      * See http://wiki.apache.org/jackrabbit/EncodingAndEscaping.
196      */
197     private String escapeFullTextExpression(final String fulltextExpression) {
198         //
199         List<String> matchList = findSimpleTerms(fulltextExpression);
200 
201         final List<String> simpleTerms = new ArrayList<String>();
202         for (String token : matchList) {
203             if ("or".equals(token)) { // yes, Jackrabbit doesn't like lowercase or
204                 simpleTerms.add("OR");
205             } else {
206                 simpleTerms.add(escapeIllegalFullTextSearchChars(token));
207             }
208         }
209         // workaround as our regex does not match one single double quote ["]
210         if ("\"".equals(fullTextExpression)) {
211             simpleTerms.add("\\\"");
212         }
213         String returnValue = StringUtils.join(simpleTerms, " ");
214 
215         return returnValue.replaceAll("'", "''").trim();
216     }
217 
218     /**
219      * @return a list of simple terms according to JCR 2.0 definition, i.e. SimpleTerm ::= Word | '"' Word {Space Word} '"'
220      * (See http://www.day.com/specs/jcr/2.0/6_Query.html#6.7.19%20FullTextSearch)
221      */
222     private List<String> findSimpleTerms(final String unescapedFullTextExpression) {
223         List<String> matchList = new LinkedList<String>();
224         Matcher regexMatcher = simpleTermsRegexPattern.matcher(unescapedFullTextExpression);
225         while (regexMatcher.find()) {
226             matchList.add(regexMatcher.group());
227         }
228         return matchList;
229     }
230 
231     /**
232      * Within a term, each sensitive char must be escaped by a preceding “\”.<br>
233      * - “-” (minus sign), “+” (plus sign) and “\” (backslash) are escaped if they are the single element of the term <br>
234      * - "()[]{}" (all brackets) are always escaped<br>
235      * - “"” (double quote) is always escape unless it delimits a simple term, i.e <code>"foo -bar"</code><br>
236      * <strong>This method has package visibility for testing purposes.</strong>
237      */
238     final String escapeIllegalFullTextSearchChars(final String simpleTerm) {
239         StringBuilder sb = new StringBuilder(simpleTerm.length());
240 
241         for (int i = 0; i < simpleTerm.length(); i++) {
242             char ch = simpleTerm.charAt(i);
243             if (("\\+-".contains(String.valueOf(ch)) && simpleTerm.length() == 1)
244                     || ("()[]{}".contains(String.valueOf(ch)))
245                     || ("\"".contains(String.valueOf(ch)) && (i != 0 && i != simpleTerm.length() - 1))) {
246                 sb.append('\\');
247             }
248             sb.append(ch);
249         }
250         return sb.toString();
251     }
252 
253 }