1 /** 2 * This file Copyright (c) 2015 Magnolia International 3 * Ltd. (http://www.magnolia-cms.com). All rights reserved. 4 * 5 * 6 * This file is dual-licensed under both the Magnolia 7 * Network Agreement and the GNU General Public License. 8 * You may elect to use one or the other of these licenses. 9 * 10 * This file is distributed in the hope that it will be 11 * useful, but AS-IS and WITHOUT ANY WARRANTY; without even the 12 * implied warranty of MERCHANTABILITY or FITNESS FOR A 13 * PARTICULAR PURPOSE, TITLE, or NONINFRINGEMENT. 14 * Redistribution, except as permitted by whichever of the GPL 15 * or MNA you select, is prohibited. 16 * 17 * 1. For the GPL license (GPL), you can redistribute and/or 18 * modify this file under the terms of the GNU General 19 * Public License, Version 3, as published by the Free Software 20 * Foundation. You should have received a copy of the GNU 21 * General Public License, Version 3 along with this program; 22 * if not, write to the Free Software Foundation, Inc., 51 23 * Franklin St, Fifth Floor, Boston, MA 02110-1301 USA. 24 * 25 * 2. For the Magnolia Network Agreement (MNA), this file 26 * and the accompanying materials are made available under the 27 * terms of the MNA which accompanies this distribution, and 28 * is available at http://www.magnolia-cms.com/mna.html 29 * 30 * Any modifications to this file must keep this entire header 31 * intact. 32 * 33 */ 34 package info.magnolia.jackrabbit.lucene; 35 36 import java.io.IOException; 37 import java.util.Set; 38 39 import org.apache.jackrabbit.core.query.lucene.AbstractExcerpt; 40 import org.apache.jackrabbit.core.query.lucene.DefaultHighlighter; 41 import org.apache.lucene.index.Term; 42 import org.apache.lucene.index.TermPositionVector; 43 44 /** 45 * Provides an HTML excerpt highlighting the searched term. By default it will strip all HTML tags and jcr identifiers which might have been indexed. 46 * Suppose you were searching for "foo" and the text found containing it is something like 47 * 48 * <pre> 49 * <p>This is an "excerpt" <em>highlighting</em> the word foo deadbeef-face-babe-cafe-babecafebabe... 50 * </pre> 51 * 52 * it produces a result like the following which can then be used e.g. in a search result page. 53 * 54 * <pre> 55 * <div class="excerpt"> 56 * <span class="excerpt-fragment">This is an excerpt highlighting the word <strong>foo</strong>...</span> 57 * <div> 58 * 59 * </pre> 60 */ 61 public class SearchHTMLExcerpt extends AbstractExcerpt { 62 63 private static final String UUID_REGEX = "(jcr:)?[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}"; 64 65 private final NoXMLEscapeHighlighter highlighter = new NoXMLEscapeHighlighter(); 66 67 @Override 68 protected String createExcerpt(TermPositionVector tpv, String text, int maxFragments, int maxFragmentSize) throws IOException { 69 70 String excerpt = highlighter.doHighlight(tpv, getQueryTerms(), text, 71 "<div class=\"excerpt\">", "</div>", "<span class=\"excerpt-fragment\">", "</span>", "<strong>", "</strong>", 72 maxFragments, maxFragmentSize / 2); 73 74 // need to strip ids after the highlighting else the latter, for some reason, doesn't match query terms correctly. 75 return excerpt.replaceAll(UUID_REGEX, ""); 76 } 77 78 /** 79 * Unlike {@link DefaultHighlighter} this one does not XML escape its string input. 80 * The XML escaping done by DefaultHighlighter escapes HTML tags and quotes (") with HTML entities, e.g <p> becomes &lt;p&gt; 81 * which is ugly to see e.g. in a search result page. 82 */ 83 private class NoXMLEscapeHighlighter extends DefaultHighlighter { 84 /** 85 * @return raw input 86 */ 87 @Override 88 public String escape(String input) { 89 return input; 90 } 91 92 /** 93 * We need to override this and call it explicitly as otherwise our {@link #escape(String)} method would never be called, 94 * see {@link DefaultHighlighter#highlight(TermPositionVector, Set, String, String, String, String, String, String, String, int, int)} line #109. 95 */ 96 @Override 97 protected String doHighlight(TermPositionVector tvec, Set<Term[]> queryTerms, String text, String excerptStart, String excerptEnd, String fragmentStart, String fragmentEnd, String hlStart, String hlEnd, int maxFragments, int surround) throws IOException { 98 return super.doHighlight(tvec, queryTerms, text, excerptStart, excerptEnd, fragmentStart, fragmentEnd, hlStart, hlEnd, maxFragments, surround); 99 } 100 } 101 }