View Javadoc
1   /**
2    * This file Copyright (c) 2012-2014 Magnolia International
3    * Ltd.  (http://www.magnolia-cms.com). All rights reserved.
4    *
5    *
6    * This file is dual-licensed under both the Magnolia
7    * Network Agreement and the GNU General Public License.
8    * You may elect to use one or the other of these licenses.
9    *
10   * This file is distributed in the hope that it will be
11   * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the
12   * implied warranty of MERCHANTABILITY or FITNESS FOR A
13   * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT.
14   * Redistribution, except as permitted by whichever of the GPL
15   * or MNA you select, is prohibited.
16   *
17   * 1. For the GPL license (GPL), you can redistribute and/or
18   * modify this file under the terms of the GNU General
19   * Public License, Version 3, as published by the Free Software
20   * Foundation.  You should have received a copy of the GNU
21   * General Public License, Version 3 along with this program;
22   * if not, write to the Free Software Foundation, Inc., 51
23   * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA.
24   *
25   * 2. For the Magnolia Network Agreement (MNA), this file
26   * and the accompanying materials are made available under the
27   * terms of the MNA which accompanies this distribution, and
28   * is available at http://www.magnolia-cms.com/mna.html
29   *
30   * Any modifications to this file must keep this entire header
31   * intact.
32   *
33   */
34  package info.magnolia.ui.workbench.search;
35  
36  import info.magnolia.jcr.util.NodeTypes;
37  import info.magnolia.ui.vaadin.integration.contentconnector.JcrContentConnectorDefinition;
38  import info.magnolia.ui.workbench.container.OrderBy;
39  import info.magnolia.ui.workbench.list.FlatJcrContainer;
40  
41  import java.util.ArrayList;
42  import java.util.LinkedList;
43  import java.util.List;
44  import java.util.regex.Matcher;
45  import java.util.regex.Pattern;
46  
47  import javax.jcr.nodetype.NodeType;
48  
49  import org.apache.commons.lang3.StringUtils;
50  import org.apache.jackrabbit.util.Text;
51  import org.slf4j.Logger;
52  import org.slf4j.LoggerFactory;
53  
54  /**
55   * The jcr container backing the search view. It provides the subset of items returned by the current search. It will include <code>mgnl:folder</code> nodes if the latter are defined as "searchable".
56   *
57   * @see #findSearchableNodeTypes()
58   */
59  public class SearchJcrContainer extends FlatJcrContainer {
60  
61      private static final Logger log = LoggerFactory.getLogger(SearchJcrContainer.class);
62  
63      protected static final String WHERE_TEMPLATE_FOR_SEARCH = "lower(localname()) LIKE '%1$s%%' or " + SELECTOR_NAME + ".['%2$s'] IS NOT NULL %3$s";
64  
65      protected static final String CONTAINS_TEMPLATE_FOR_SEARCH = "contains(" + SELECTOR_NAME + ".*, '%1$s')";
66  
67      protected static final String JCR_SCORE_FUNCTION = "score(" + SELECTOR_NAME + ")";
68  
69      private String fullTextExpression;
70  
71      private String whereCauseNodeTypes;
72      /**
73       * Will split a string like the following into simple terms. <em>Get "your facts" first and then "you can distort them" as much "as you please"</em>
74       * <ul>
75       * <li>Get
76       * <li>"your facts"
77       * <li>first
78       * <li>and
79       * <li>then
80       * <li>"you can distort them"
81       * <li>as
82       * <li>much
83       * <li>"as you please"
84       * </ul>
85       */
86      private static final Pattern simpleTermsRegexPattern = Pattern.compile("[^\\s\"']+|\"[^\"]*\"|'[^']*'");
87  
88      public SearchJcrContainer(JcrContentConnectorDefinition definition) {
89          super(definition);
90          whereCauseNodeTypes = super.getQueryWhereClauseNodeTypes();
91  
92          for (NodeType nt : getSearchableNodeTypes()) {
93              // include mgnl:folder if searchable
94              if (NodeTypes.Folder.NAME.equals(nt.getName())) {
95                  whereCauseNodeTypes += " or [jcr:primaryType] = '" + NodeTypes.Folder.NAME + "'";
96                  break;
97              }
98          }
99      }
100 
101     /**
102      * Overrides its default implementation to take further constraints from {@link #getQueryWhereClauseSearch()} into account.
103      */
104     @Override
105     protected String getQueryWhereClause() {
106         final String clauseWorkspacePath = getQueryWhereClauseWorkspacePath();
107         final String whereClauseSearch = getQueryWhereClauseSearch();
108 
109         String whereClause = "(" + getQueryWhereClauseNodeTypes() + ")";
110 
111         if (!"".equals(whereClauseSearch)) {
112             whereClause += " and (" + whereClauseSearch + ") ";
113         }
114 
115         if (!"".equals(clauseWorkspacePath)) {
116             if (!"".equals(whereClause)) {
117                 whereClause = clauseWorkspacePath + " and " + whereClause;
118             } else {
119                 whereClause += clauseWorkspacePath;
120             }
121         }
122 
123         if (!"".equals(whereClause)) {
124             whereClause = " where (" + whereClause + ")";
125         }
126 
127         log.debug("JCR query WHERE clause is {}", whereClause);
128         return whereClause;
129     }
130 
131     @Override
132     protected String getQueryWhereClauseNodeTypes() {
133         return whereCauseNodeTypes;
134     }
135 
136     /**
137      * Builds a string representing the constraints to be applied for this search. Used by the overridden {@link #getQueryWhereClause()} to augment the WHERE clause for this query.
138      * It basically adds constraints on node names, property names and full-text search on all <code>searchable</code> properties, i.e. those not excluded by Magnolia/JackRabbit's indexing configuration.
139      * <p>
140      * See /magnolia-core/src/main/resources/info/magnolia/jackrabbit/indexing_configuration.xml
141      */
142     protected String getQueryWhereClauseSearch() {
143         if (StringUtils.isBlank(getFullTextExpression())) {
144             return "";
145         }
146         final String unescapedFullTextExpression = getFullTextExpression();
147         final String escapedSearch = Text.escapeIllegalJcrChars(unescapedFullTextExpression);
148         final String escapedSearchLowercase = Text.escapeIllegalJcrChars(unescapedFullTextExpression.toLowerCase());
149 
150         final String stmt = String.format(WHERE_TEMPLATE_FOR_SEARCH, escapedSearchLowercase, escapedSearch, String.format("or " + CONTAINS_TEMPLATE_FOR_SEARCH, escapeFullTextExpression(unescapedFullTextExpression)));
151 
152         log.debug("Search where-clause is {}", stmt);
153         return stmt;
154     }
155 
156     public void setFullTextExpression(String fullTextExpression) {
157         this.fullTextExpression = fullTextExpression;
158     }
159 
160     public String getFullTextExpression() {
161         return fullTextExpression;
162     }
163 
164     @Override
165     protected String getJcrNameOrderByFunction() {
166         return JCR_SCORE_FUNCTION;
167     }
168 
169     @Override
170     /**
171      * Order by jcr score descending.
172      */
173     protected OrderBy getDefaultOrderBy(String property) {
174         return new OrderBy(property, false);
175     }
176 
177     /**
178      * See http://wiki.apache.org/jackrabbit/EncodingAndEscaping.
179      */
180     private String escapeFullTextExpression(final String fulltextExpression) {
181         //
182         List<String> matchList = findSimpleTerms(fulltextExpression);
183 
184         final List<String> simpleTerms = new ArrayList<String>();
185         for (String token : matchList) {
186             if ("or".equals(token)) { // yes, Jackrabbit doesn't like lowercase or
187                 simpleTerms.add("OR");
188             } else {
189                 simpleTerms.add(escapeIllegalFullTextSearchChars(token));
190             }
191         }
192         // workaround as our regex does not match one single double quote ["]
193         if ("\"".equals(fullTextExpression)) {
194             simpleTerms.add("\\\"");
195         }
196         String returnValue = StringUtils.join(simpleTerms, " ");
197 
198         return returnValue.replaceAll("'", "''").trim();
199     }
200 
201     /**
202      * @return a list of simple terms according to JCR 2.0 definition, i.e. SimpleTerm ::= Word | '"' Word {Space Word} '"'
203      * (See http://www.day.com/specs/jcr/2.0/6_Query.html#6.7.19%20FullTextSearch)
204      */
205     private List<String> findSimpleTerms(final String unescapedFullTextExpression) {
206         List<String> matchList = new LinkedList<String>();
207         Matcher regexMatcher = simpleTermsRegexPattern.matcher(unescapedFullTextExpression);
208         while (regexMatcher.find()) {
209             matchList.add(regexMatcher.group());
210         }
211         return matchList;
212     }
213 
214     /**
215      * Within a term, each sensitive char must be escaped by a preceding “\”.<br>
216      * - “-” (minus sign), “+” (plus sign) and “\” (backslash) are escaped if they are the single element of the term <br>
217      * - "()[]{}" (all brackets) are always escaped<br>
218      * - “"” (double quote) is always escape unless it delimits a simple term, i.e <code>"foo -bar"</code><br>
219      * <strong>This method has package visibility for testing purposes.</strong>
220      */
221     final String escapeIllegalFullTextSearchChars(final String simpleTerm) {
222         StringBuilder sb = new StringBuilder(simpleTerm.length());
223 
224         for (int i = 0; i < simpleTerm.length(); i++) {
225             char ch = simpleTerm.charAt(i);
226             if (("\\+-".contains(String.valueOf(ch)) && simpleTerm.length() == 1)
227                     || ("()[]{}".contains(String.valueOf(ch)))
228                     || ("\"".contains(String.valueOf(ch)) && (i != 0 && i != simpleTerm.length() - 1))) {
229                 sb.append('\\');
230             }
231             sb.append(ch);
232         }
233         return sb.toString();
234     }
235 
236 }